collector.py 8.69 KB
Newer Older
1
import threading
2
import queue
3
import logging
4
import os.path
Gerion Entrup's avatar
Gerion Entrup committed
5
import retrieval
6
import musicbrainzngs
7

8
from mbdata.models import ArtistCredit, ArtistCreditName, Artist, Release, ReleaseGroup, Medium, Track, MediumFormat
9

10
from retrieval import Entity, Filter
Gerion Entrup's avatar
Gerion Entrup committed
11

12
from model import Recording
13
from utils import pairwise
14

15
Paths = queue.Queue(maxsize=10)
16
lock = threading.RLock()
17

18

19
class Collector(threading.Thread):
20
    """
21
    Collects Tags and write them to the database.
22
23
    """

24
    def __init__(self, session_fac, threaded=True):
Gerion Entrup's avatar
Gerion Entrup committed
25
        retrieval.init(Collector._structure, session_fac)
26
27
28
29
30
31
32
33
34
35
        if threaded:
            super().__init__(target=self)
            self._logger = logging.getLogger('collector')

    @classmethod
    def get_unthreaded_instance(cls, session, logger):
        instance = cls(None, threaded=False)
        instance._session = session
        instance._logger = logger

36
    def run(self):
37
        while True:
38
            mbid, path = Paths.get()
Gerion Entrup's avatar
Gerion Entrup committed
39
40
            self._logger.info("Adding file " +
                              "{} to the database.".format(path))
41
            try:
42
                retrieval.create(Recording, mbid, path)
Gerion Entrup's avatar
Gerion Entrup committed
43
                retrieval.commit()
44
            except musicbrainzngs.WebServiceError as exc:
Gerion Entrup's avatar
Gerion Entrup committed
45
46
                self._logger.error("Could not connect to Musicbrainz. "
                                   "Path: {}, Request: {}".format(path, exc))
47
            Paths.task_done()
48
        self._session.close()
49

Gerion Entrup's avatar
Gerion Entrup committed
50
51
52
53
    def _release_mapping(release, result, mbid):
        # non nullable attributes
        release.gid = result['id']
        release.name = result['title']
54
        release.artist_credit = retrieval.create(ArtistCredit,
Gerion Entrup's avatar
Gerion Entrup committed
55
56
                                                 result['artist-credit'],
                                                 result['artist-credit-phrase'])
57
        release.release_group = retrieval.create(ReleaseGroup,
Gerion Entrup's avatar
Gerion Entrup committed
58
59
60
61
62
63
64
65
                                                 result['release-group']['id'])

        # nullable attributes
        if 'quality' in result:
            release.quality = retrieval.map_quality(result['quality'])

        # extended mapping
        for medium in result['medium-list']:
66
            retrieval.create(Medium, release, medium['position'], medium)
Gerion Entrup's avatar
Gerion Entrup committed
67
68
69
70
71
72
73
74

    def _recording_mapping(recording, result, mbid, path):
        # non nullable attributes
        recording.gid = result['id']
        recording.name = result['title']
        recording.fgid = mbid
        recording.path = path
        recording.ftype = os.path.splitext(path)[1][1:]
75
        recording.artist_credit = retrieval.create(ArtistCredit,
Gerion Entrup's avatar
Gerion Entrup committed
76
77
78
79
80
81
82
83
84
                                                   result['artist-credit'],
                                                   result['artist-credit-phrase'])

        # nullable attributes
        if 'length' in result:
            recording.length = result['length']

        # extended mapping
        for releasedata in result['release-list']:
85
            release = retrieval.create(Release, releasedata['id'])
Gerion Entrup's avatar
Gerion Entrup committed
86
87
88
89
90
            # Find track in release.
            # This is clearly a workaround and only works efficient
            # because of caching. Correct way would be to fetch all tracks
            # directly, but the musicbrainz api offers no way to do this.
            mediumlist = retrieval.get_table_by_id(
91
                releasedata['id'], Release)['medium-list']
Gerion Entrup's avatar
Gerion Entrup committed
92
93
94
            for medium in mediumlist:
                for track in medium['track-list']:
                    if track['recording']['id'] == result['id']:
95
                        retrieval.create(Track, track['id'], release,
Gerion Entrup's avatar
Gerion Entrup committed
96
97
98
99
100
101
102
103
104
105
106
                                         recording, medium, track)

    def _artist_mapping(artist, result, mbid):
        # non nullable attributes
        artist.gid = result['id']
        artist.name = result['name']
        artist.sort_name = result['sort-name']

    def _release_group_mapping(release_group, result, mbid):
        release_group.gid = result['id']
        release_group.name = result['title']
107
        release_group.artist_credit = retrieval.create(ArtistCredit,
Gerion Entrup's avatar
Gerion Entrup committed
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
                                                       result['artist-credit'],
                                                       result['artist-credit-phrase'])

    def _artist_credit_mapping(artist_credit, acresult, acphrase):
        # non nullable attributes
        artist_credit.name = acphrase
        # acresult is of type ["artist1", "feat.", "artist2"]
        artist_credit.artist_count = (len(acresult) + 1) // 2

        # nullable attributes
        artist_credit.ref_count = 1

        # extended mapping
        for idx, (data, joinphrase) in enumerate(pairwise(acresult + [''])):
            data['joinphrase'] = joinphrase
123
            retrieval.create(ArtistCreditName,
Gerion Entrup's avatar
Gerion Entrup committed
124
125
126
127
128
129
130
131
132
133
134
                             artist_credit,
                             idx + 1,  # SQL IDs begin with 1
                             data)

    def _artist_credit_r_mapping(artist_credit, acphrase, acresult):
        artist_credit.ref_count += 1

    def _artist_credit_name_mapping(acn, artist_credit, position, data):
        # non nullable attributes
        acn.artist_credit = artist_credit
        acn.position = position
135
        acn.artist = retrieval.create(Artist, data['artist']['id'])
Gerion Entrup's avatar
Gerion Entrup committed
136
137
138
139
140
141
142
143
144
        acn.name = data['name'] if 'name' in data else data['artist']['name']

        acn.join_phrase = data['joinphrase']

    def _medium_mapping(medium, release, position, medium_data):
        medium.release = release
        medium.position = medium_data['position']
        medium.track_count = medium_data['track-count']
        if 'format' in medium_data:
145
            medium.format = retrieval.create(MediumFormat,
Gerion Entrup's avatar
Gerion Entrup committed
146
147
148
149
150
151
152
153
154
155
156
157
158
159
                                             medium_data['format'])

    def _medium_format_mapping(medium_format, name):
        medium_format.name = name
        # This is clearly a workaround because the musicbrainz-API does not
        # allow to retrieve the gid of the medium
        medium_format.gid = retrieval.fake_id('mediumformat', name)

    def _track_mapping(track, mbid, release, recording, medium_data, data):
        track.gid = data['id']
        track.position = data['position']
        track.number = data['number']
        track.recording = recording
        track.name = recording.name
160
        track.medium = retrieval.create(Medium, release,
Gerion Entrup's avatar
Gerion Entrup committed
161
162
163
164
165
166
167
                                        medium_data['position'], medium_data)
        track.artist_credit = recording.artist_credit
        track.artist_credit.ref_count += 1
        if 'length' in data:
            track.length = data['length']

    _structure = {
168
169
170
171
172
173
174
175
176
177
178
179
180
        Release: Entity(parameter=('mbid',),
                        query_filter=Filter(gid='mbid'),
                        web='mbid',
                        mapping=_release_mapping),
        Recording: Entity(parameter=('mbid', 'path'),
                          query_filter=Filter(gid='mbid'),
                          web='mbid',
                          mapping=_recording_mapping),
        Artist: Entity(parameter=('mbid',),
                       query_filter=Filter(gid='mbid'),
                       web='mbid',
                       mapping=_artist_mapping),
        ReleaseGroup: Entity(parameter=('mbid',),
Gerion Entrup's avatar
Gerion Entrup committed
181
182
                             query_filter=Filter(gid='mbid'),
                             web='mbid',
183
184
185
                             mapping=_release_group_mapping),
        ArtistCredit: Entity(parameter=('acresult', 'acphrase'),
                             query_filter=Filter(name='acphrase'),
Gerion Entrup's avatar
Gerion Entrup committed
186
                             web=None,
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
                             mapping=_artist_credit_mapping,
                             reverse_mapping=_artist_credit_r_mapping),
        ArtistCreditName: Entity(parameter=('artist_credit', 'position',
                                            'data'),
                                 query_filter=Filter(artist_credit='artist_credit',
                                                     position='position'),
                                 web=None,
                                 mapping=_artist_credit_name_mapping),
        Medium: Entity(parameter=('release', 'position', 'medium_data'),
                       query_filter=Filter(release='release',
                                           position='position'),
                       web=None,
                       mapping=_medium_mapping),
        MediumFormat: Entity(parameter=('name',),
                             query_filter=Filter(name='name'),
                             web=None,
                             mapping=_medium_format_mapping),
        Track: Entity(parameter=('mbid', 'release', 'recording',
                                 'medium_data', 'data'),
                      query_filter=Filter(gid='mbid'),
                      web=None,
                      mapping=_track_mapping)}