collector.py 7.7 KB
Newer Older
1
import pprint
2
import mutagen
3
import musicbrainzngs
4
import itertools
5
import threading
6
import queue
7
import logging
8

9
from sqlalchemy import and_
10
from sqlalchemy.orm import Session
11
from musicbrainzngs import WebServiceError
12
from collections import namedtuple
13

14
15
from model import Recording
from mbdata.models import ArtistCredit, ArtistCreditName, Artist, Release, ReleaseGroup, Medium, Track, MediumFormat
16
from utils import pairwise
17
from fetcher import Fetcher
18

19
Paths = queue.Queue(maxsize=10)
20
21
NEW = 0; DEFECT = 1; UPDATE = 2
NewData = namedtuple('NewData', ['mbid', 'path'])
22
DefectData = namedtuple('DefectData', ['mbid', 'table'])
23

24
class Collector(threading.Thread):
25
    """
26
    Collects Tags and write them to the database.
27
28
    """

29
    def __init__(self, session_fac):
30
31
        super().__init__(target=self)
        self._session = session_fac()
32
        self.fetcher = Fetcher()
Gerion Entrup's avatar
Gerion Entrup committed
33
        self.log = logging.getLogger('collector')
34

35
    def run(self):
36
        while True:
37
38
39
40
41
42
43
44
45
46
47
48
49
            action, data = Paths.get()
            if action == NEW:
                self.log.info("Adding file {} to the database.".format(data.path))
                self.fetch_recording(data.mbid, data.path)
                self._session.commit()
            elif action == DEFECT:
                self.log.info("Repair broken entry with mbid {}".format(data))
                #TODO
            elif action == UPDATE:
                self.log.info("Update outdated entry with mbid {}".format(data))
                #TODO
            else:
                self.log.error("Unknow action: {}".format(action))
50
            Paths.task_done()
51
        self._session.close()
52

Gerion Entrup's avatar
Gerion Entrup committed
53
    def create_artist_credit(self, acresult, acphrase):
54
        ac = self._session.query(ArtistCredit).filter_by(name=acphrase).first()
Gerion Entrup's avatar
Gerion Entrup committed
55
56
57
58
59
        if ac is None:
            ac = ArtistCredit()
            ac.name = acphrase
            acns = self.create_artist_credit_name(acresult, ac)
            ac.artist_count = len(acns)
60
            ac.ref_count = 1
Gerion Entrup's avatar
Gerion Entrup committed
61

62
            self._session.add(ac)
Gerion Entrup's avatar
Gerion Entrup committed
63
            for acn in acns:
64
                self._session.add(acn)
65
66
        else:
            ac.ref_count += 1
Gerion Entrup's avatar
Gerion Entrup committed
67
68
        return ac

69
70
71
72
73
74
75
76
77
78
79
80
81
    def create_artist_credit_name(self, acresult, artistcredit):
        acns = []
        for artist, joinphrase in pairwise(acresult + [""]):
            acn = ArtistCreditName()
            acn.artist_credit = artistcredit
            acn.position = len(acns) + 1
            acn.artist = self.fetch_artist(artist['artist']['id'])
            acn.name = artist['name'] if 'name' in artist else artist['artist']['name']
            acn.join_phrase = joinphrase

            acns.append(acn)
        return acns

82
83
84
85
86
87
88
    def create_medium(self, mediumdata, release):
        mediums = []
        for med in mediumdata:
            medium = Medium()
            medium.release = release
            medium.position = med['position']
            medium.track_count = len(med['track-list'])
89
90
            if 'format' in med:
                medium.format = self.create_medium_format(med['format'])
91
92

            mediums.append(medium)
93
            self._session.add(medium)
94
95
96
        return mediums

    def create_medium_format(self, name):
97
        mediumformat = self._session.query(MediumFormat).filter_by(name=name).first()
98
99
100
        if mediumformat is None:
            mediumformat = MediumFormat()
            mediumformat.name = name
101
            self._session.add(mediumformat)
102
103
104
        return mediumformat

    def create_track(self, trackdata, release, recording, mediumdata):
Gerion Entrup's avatar
Gerion Entrup committed
105
        track = self._session.query(Track).filter_by(gid=trackdata['id']).first()
106
107
108
        if track is None:
            track = Track()
            track.gid = trackdata['id']
109
110
            track.position = trackdata['position']
            track.number = trackdata['number']
111
            track.recording = recording
112
            track.name = recording.name
113
            track.medium = self._session.query(Medium).filter(and_(Medium.position == mediumdata['position'],
114
115
                                                                  Medium.release == release)).one()
            track.artist_credit = recording.artist_credit
116
            track.artist_credit.ref_count += 1
117
118
            if 'length' in trackdata:
                track.length = trackdata['length']
119
120
            self._session.add(track)
        return track
121

Gerion Entrup's avatar
Gerion Entrup committed
122
    def fetch_recording(self, mbid, path):
123
        recording = self._session.query(Recording).filter_by(gid=mbid).first()
124
        if recording is None:
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
            try:
                result = self.fetcher.get_table_by_id(mbid, 'recording')

                #minimal mapping
                recording = Recording()
                recording.gid = result['id']
                recording.name = result['title']
                recording.path = path
                recording.ftype = path.split('.')[-1]
                recording.artist_credit = self.create_artist_credit(result['artist-credit'], result['artist-credit-phrase'])
                if 'length' in result:
                    recording.length = result['length']
                self._session.add(recording)

                #extended mapping
                for releasedata in result['release-list']:
                    release = self.fetch_release(releasedata['id'])
                    # find track in release,
                    # this is clearly a workaround and only works efficient because of caching.
                    # correct way would be to fetch all tracks directly, but the musicbrainz api
145
                    # offers no way to do this.
146
147
148
149
150
151
152
153
                    mediumlist = self.fetcher.get_table_by_id(releasedata['id'], 'release')['medium-list']
                    for medium in mediumlist:
                        for track in medium['track-list']:
                            if track['recording']['id'] == mbid:
                                self.create_track(track, release, recording, medium)
                self._session.commit()
            except musicbrainzngs.WebServiceError as exc:
                self._session.rollback()
Gerion Entrup's avatar
Gerion Entrup committed
154
                self.log.error("Could not connect to Musicbrainz. Path: {} Request: {}".format(path, exc))
155
                recording = None
156
157
158
        return recording


Gerion Entrup's avatar
Gerion Entrup committed
159
    def fetch_release_group(self, mbid):
160
        rg = self._session.query(ReleaseGroup).filter_by(gid=mbid).first()
Gerion Entrup's avatar
Gerion Entrup committed
161
        if rg is None:
162
            result = self.fetcher.get_table_by_id(mbid, 'release-group')
Gerion Entrup's avatar
Gerion Entrup committed
163

164

Gerion Entrup's avatar
Gerion Entrup committed
165
166
167
            rg = ReleaseGroup()
            rg.gid = result['id']
            rg.name = result['title']
168
            rg.artist_credit = self.create_artist_credit(result['artist-credit'], result['artist-credit-phrase'])
Gerion Entrup's avatar
Gerion Entrup committed
169

170
            self._session.add(rg)
Gerion Entrup's avatar
Gerion Entrup committed
171
172
173
        return rg


174
    def fetch_artist(self, mbid):
175
        artist = self._session.query(Artist).filter_by(gid=mbid).first()
176
        if artist is None:
177
            result = self.fetcher.get_table_by_id(mbid, 'artist')
178
179
180
181
182
183
184

            artist = Artist()
            artist.gid = result['id']
            artist.name = result['name']
            artist.sort_name = result['sort-name']

            #add to db
185
            self._session.add(artist)
186
187
        return artist

Gerion Entrup's avatar
Gerion Entrup committed
188
    def fetch_release(self, mbid):
Gerion Entrup's avatar
Gerion Entrup committed
189
        release = self._session.query(Release).filter_by(gid=mbid).first()
Gerion Entrup's avatar
Gerion Entrup committed
190
        if release is None:
191
            result = self.fetcher.get_table_by_id(mbid, 'release')
Gerion Entrup's avatar
Gerion Entrup committed
192

193
            #minimal mapping
Gerion Entrup's avatar
Gerion Entrup committed
194
195
196
            release = Release()
            release.gid = result['id']
            release.name = result['title']
197
198
            release.artist_credit = self.create_artist_credit(result['artist-credit'], result['artist-credit-phrase'])
            release.release_group = self.fetch_release_group(result['release-group']['id'])
Gerion Entrup's avatar
Gerion Entrup committed
199

200
            self._session.add(release)
201
202
203

            #extended mapping
            self.create_medium(result['medium-list'], release)
Gerion Entrup's avatar
Gerion Entrup committed
204
205
        return release

206