collector.py 7.68 KB
Newer Older
1
import pprint
2
import mutagen
3
import musicbrainzngs
4
import itertools
5
import threading
6
import queue
7
import logging
8

9
from sqlalchemy import and_
10
from sqlalchemy.orm import Session
11
from musicbrainzngs import WebServiceError
12
from collections import namedtuple
13

14
from mbdata.models import Recording, ArtistCredit, ArtistCreditName, Artist, Release, ReleaseGroup, Medium, Track, MediumFormat
15
from utils import pairwise
16
from fetcher import Fetcher
17

18
Paths = queue.Queue(maxsize=10)
19
20
NEW = 0; DEFECT = 1; UPDATE = 2
NewData = namedtuple('NewData', ['mbid', 'path'])
21
DefectData = namedtuple('DefectData', ['mbid', 'table'])
22

23
class Collector(threading.Thread):
24
    """
25
    Collects Tags and write them to the database.
26
27
    """

28
    def __init__(self, session_fac):
29
30
        super().__init__(target=self)
        self._session = session_fac()
31
        self.fetcher = Fetcher()
Gerion Entrup's avatar
Gerion Entrup committed
32
        self.log = logging.getLogger('collector')
33

34
    def run(self):
35
        while True:
36
37
38
39
40
41
42
43
44
45
46
47
48
            action, data = Paths.get()
            if action == NEW:
                self.log.info("Adding file {} to the database.".format(data.path))
                self.fetch_recording(data.mbid, data.path)
                self._session.commit()
            elif action == DEFECT:
                self.log.info("Repair broken entry with mbid {}".format(data))
                #TODO
            elif action == UPDATE:
                self.log.info("Update outdated entry with mbid {}".format(data))
                #TODO
            else:
                self.log.error("Unknow action: {}".format(action))
49
            Paths.task_done()
50
        self._session.close()
51

Gerion Entrup's avatar
Gerion Entrup committed
52
    def create_artist_credit(self, acresult, acphrase):
53
        ac = self._session.query(ArtistCredit).filter_by(name=acphrase).first()
Gerion Entrup's avatar
Gerion Entrup committed
54
55
56
57
58
        if ac is None:
            ac = ArtistCredit()
            ac.name = acphrase
            acns = self.create_artist_credit_name(acresult, ac)
            ac.artist_count = len(acns)
59
            ac.ref_count = 1
Gerion Entrup's avatar
Gerion Entrup committed
60

61
            self._session.add(ac)
Gerion Entrup's avatar
Gerion Entrup committed
62
            for acn in acns:
63
                self._session.add(acn)
64
65
        else:
            ac.ref_count += 1
Gerion Entrup's avatar
Gerion Entrup committed
66
67
        return ac

68
69
70
71
72
73
74
75
76
77
78
79
80
    def create_artist_credit_name(self, acresult, artistcredit):
        acns = []
        for artist, joinphrase in pairwise(acresult + [""]):
            acn = ArtistCreditName()
            acn.artist_credit = artistcredit
            acn.position = len(acns) + 1
            acn.artist = self.fetch_artist(artist['artist']['id'])
            acn.name = artist['name'] if 'name' in artist else artist['artist']['name']
            acn.join_phrase = joinphrase

            acns.append(acn)
        return acns

81
82
83
84
85
86
87
    def create_medium(self, mediumdata, release):
        mediums = []
        for med in mediumdata:
            medium = Medium()
            medium.release = release
            medium.position = med['position']
            medium.track_count = len(med['track-list'])
88
89
            if 'format' in med:
                medium.format = self.create_medium_format(med['format'])
90
91

            mediums.append(medium)
92
            self._session.add(medium)
93
94
95
        return mediums

    def create_medium_format(self, name):
96
        mediumformat = self._session.query(MediumFormat).filter_by(name=name).first()
97
98
99
        if mediumformat is None:
            mediumformat = MediumFormat()
            mediumformat.name = name
100
            self._session.add(mediumformat)
101
102
103
        return mediumformat

    def create_track(self, trackdata, release, recording, mediumdata):
Gerion Entrup's avatar
Gerion Entrup committed
104
        track = self._session.query(Track).filter_by(gid=trackdata['id']).first()
105
106
107
        if track is None:
            track = Track()
            track.gid = trackdata['id']
108
109
            track.position = trackdata['position']
            track.number = trackdata['number']
110
            track.recording = recording
111
            track.name = recording.name
112
            track.medium = self._session.query(Medium).filter(and_(Medium.position == mediumdata['position'],
113
114
                                                                  Medium.release == release)).one()
            track.artist_credit = recording.artist_credit
115
            track.artist_credit.ref_count += 1
116
117
            if 'length' in trackdata:
                track.length = trackdata['length']
118
119
            self._session.add(track)
        return track
120

Gerion Entrup's avatar
Gerion Entrup committed
121
    def fetch_recording(self, mbid, path):
122
        recording = self._session.query(Recording).filter_by(gid=mbid).first()
123
        if recording is None:
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
            try:
                result = self.fetcher.get_table_by_id(mbid, 'recording')

                #minimal mapping
                recording = Recording()
                recording.gid = result['id']
                recording.name = result['title']
                recording.path = path
                recording.ftype = path.split('.')[-1]
                recording.artist_credit = self.create_artist_credit(result['artist-credit'], result['artist-credit-phrase'])
                if 'length' in result:
                    recording.length = result['length']
                self._session.add(recording)

                #extended mapping
                for releasedata in result['release-list']:
                    release = self.fetch_release(releasedata['id'])
                    # find track in release,
                    # this is clearly a workaround and only works efficient because of caching.
                    # correct way would be to fetch all tracks directly, but the musicbrainz api
                    # offer no way to do this.
                    mediumlist = self.fetcher.get_table_by_id(releasedata['id'], 'release')['medium-list']
                    for medium in mediumlist:
                        for track in medium['track-list']:
                            if track['recording']['id'] == mbid:
                                self.create_track(track, release, recording, medium)
                self._session.commit()
            except musicbrainzngs.WebServiceError as exc:
                self._session.rollback()
Gerion Entrup's avatar
Gerion Entrup committed
153
                self.log.error("Could not connect to Musicbrainz. Path: {} Request: {}".format(path, exc))
154
                recording = None
155
156
157
        return recording


Gerion Entrup's avatar
Gerion Entrup committed
158
    def fetch_release_group(self, mbid):
159
        rg = self._session.query(ReleaseGroup).filter_by(gid=mbid).first()
Gerion Entrup's avatar
Gerion Entrup committed
160
        if rg is None:
161
            result = self.fetcher.get_table_by_id(mbid, 'release-group')
Gerion Entrup's avatar
Gerion Entrup committed
162

163

Gerion Entrup's avatar
Gerion Entrup committed
164
165
166
            rg = ReleaseGroup()
            rg.gid = result['id']
            rg.name = result['title']
167
            rg.artist_credit = self.create_artist_credit(result['artist-credit'], result['artist-credit-phrase'])
Gerion Entrup's avatar
Gerion Entrup committed
168

169
            self._session.add(rg)
Gerion Entrup's avatar
Gerion Entrup committed
170
171
172
        return rg


173
    def fetch_artist(self, mbid):
174
        artist = self._session.query(Artist).filter_by(gid=mbid).first()
175
        if artist is None:
176
            result = self.fetcher.get_table_by_id(mbid, 'artist')
177
178
179
180
181
182
183

            artist = Artist()
            artist.gid = result['id']
            artist.name = result['name']
            artist.sort_name = result['sort-name']

            #add to db
184
            self._session.add(artist)
185
186
        return artist

Gerion Entrup's avatar
Gerion Entrup committed
187
    def fetch_release(self, mbid):
Gerion Entrup's avatar
Gerion Entrup committed
188
        release = self._session.query(Release).filter_by(gid=mbid).first()
Gerion Entrup's avatar
Gerion Entrup committed
189
        if release is None:
190
            result = self.fetcher.get_table_by_id(mbid, 'release')
Gerion Entrup's avatar
Gerion Entrup committed
191

192
            #minimal mapping
Gerion Entrup's avatar
Gerion Entrup committed
193
194
195
            release = Release()
            release.gid = result['id']
            release.name = result['title']
196
197
            release.artist_credit = self.create_artist_credit(result['artist-credit'], result['artist-credit-phrase'])
            release.release_group = self.fetch_release_group(result['release-group']['id'])
Gerion Entrup's avatar
Gerion Entrup committed
198

199
            self._session.add(release)
200
201
202

            #extended mapping
            self.create_medium(result['medium-list'], release)
Gerion Entrup's avatar
Gerion Entrup committed
203
204
        return release

205