fetcher.py 3.8 KB
Newer Older
1
2
3
4
5
6
7
8
import collections
import logging
import musicbrainzngs
import time
import threading

import settings

9
from mbdata.models import Recording, ReleaseGroup, Artist, Release
Gerion Entrup's avatar
Gerion Entrup committed
10

Gerion Entrup's avatar
Gerion Entrup committed
11
12
""" Fetches the musicdata and caches them. Could only exist once.
Use it with get_table_by_id() and clean_cache().
13
14
"""

15

16
web = [Recording, ReleaseGroup, Artist, Release]
17

18
19
musicbrainzngs.set_useragent("brainzfs", "0.1-alpha",
                             "https://git.finf.uni-hannover.de/Chrysops/brainzfs")
20
21

_logger = logging.getLogger('collector.fetcher')
22
_lock = threading.Lock()
23
24

_cache = {}
Gerion Entrup's avatar
Gerion Entrup committed
25
for table in web:
26
    _cache[table] = {}
27
_cachedates = collections.deque()
28

29
30
_time = 0

31
32

def _get_recording(mbid):
Gerion Entrup's avatar
Gerion Entrup committed
33
    """Retrieval function for recording."""
34
35
36
37
38
39
40
41
42
    res = musicbrainzngs.get_recording_by_id(
        mbid, includes=['releases', 'artists'])['recording']
    if res['release-count'] > 25:
        res['release-list'] = musicbrainzngs.browse_releases(
            recording=mbid, limit=100)['release-list']
    return res


def _get_release_group(mbid):
Gerion Entrup's avatar
Gerion Entrup committed
43
    """Retrieval function for release_group."""
44
45
46
47
48
    return musicbrainzngs.get_release_group_by_id(
        mbid, includes=['artist-credits'])['release-group']


def _get_artist(mbid):
Gerion Entrup's avatar
Gerion Entrup committed
49
    """Retrieval function for artist."""
50
51
52
53
    return musicbrainzngs.get_artist_by_id(mbid)['artist']


def _get_release(mbid):
Gerion Entrup's avatar
Gerion Entrup committed
54
    """Retrieval function for release."""
55
56
57
58
59
60
    return musicbrainzngs.get_release_by_id(
        mbid, includes=['artists',
                        'media',
                        'recordings',
                        'release-groups'])['release']

61
62
63
64
_methods = {Recording: _get_recording,
            ReleaseGroup: _get_release_group,
            Artist: _get_artist,
            Release: _get_release}
65
66
67


def _time_ms():
Gerion Entrup's avatar
Gerion Entrup committed
68
    """Return the actual time as int in ms."""
69
70
71
72
    return int(time.time() * 1000)


def _delay():
Gerion Entrup's avatar
Gerion Entrup committed
73
    """Honor the MusicBrainz accessing delay. Sleep as long as necessary."""
74
75
76
77
78
79
80
81
    global _time
    diff = _time_ms() - _time
    if (diff < 1000):
        time.sleep((1000 - diff) / 1000)
    _time = _time_ms()


def _cache_append(tablename, mbid, result):
Gerion Entrup's avatar
Gerion Entrup committed
82
83
84
    """Append data to the cache. If the cache limit is reached, delete
    the oldest entry.
    """
85
86
87
88
89
90
91
92
    _cache[tablename][mbid] = result
    _cachedates.append((_time_ms, tablename, mbid))
    if len(_cachedates) > settings.fetcher_cache_length:
        d_time, tablename, mbid = _cachedates.popleft()
        del(_cache[tablename][mbid])


def clean_cache():
Gerion Entrup's avatar
Gerion Entrup committed
93
94
95
96
97
    """Clean all outdated entries out of the cache. This function can be
    called everytime.

    An outdated entry is an entry older than settings.fetcher_cache_age.
    """
98
99
100
101
102
103
104
105
106
107
    with _lock:
        threshold = _time_ms - settings.fetcher_cache_age * 1000
        d_time, tablename, mbid = _cachedates[0]
        while d_time < threshold:
            _cachedates.popleft()
            del(_cache[tablename][mbid])
            d_time, tablename, mbid = _cachedates[0]


def get_table_by_id(mbid, tablename):
Gerion Entrup's avatar
Gerion Entrup committed
108
109
110
111
112
113
    """Fetch the data from MusicBrainz. Ask the cache in transparent way, if
    data already available and handle the musicbrainz timing rules. Throw a
    musicbrainzngs.WebServiceError if anything goes wrong.

    Arguments:
    mbid      -- The id that should be fetched.
114
    tablename -- An ORM table class to specify the table.
115
    """
Gerion Entrup's avatar
Gerion Entrup committed
116
117
    if tablename not in web:
        raise("Error: no Web lookup possible for " + str(tablename))
118
119
120
121
122
123
124
125
126
127
128
129
130
    with _lock:
        if mbid not in _cache[tablename]:
            # delay to follow musicbrainz rules
            _delay()
            # request actual data
            result = _methods[tablename](mbid)
            _cache_append(tablename, mbid, result)
            _logger.debug("asked web for {}".format(tablename))
        else:
            result = _cache[tablename][mbid]
            _logger.debug("asked fetcher for {}".format(tablename))

        return result