Commit 619d0f3d authored by Gerion Entrup's avatar Gerion Entrup
Browse files

collector: restructuring, cleanup

parent 174d0868
...@@ -17,9 +17,6 @@ from mbdata.models import ArtistCredit, ArtistCreditName, Artist, Release, Relea ...@@ -17,9 +17,6 @@ from mbdata.models import ArtistCredit, ArtistCreditName, Artist, Release, Relea
from utils import pairwise from utils import pairwise
Paths = queue.Queue(maxsize=10) Paths = queue.Queue(maxsize=10)
NEW = 0; DEFECT = 1; UPDATE = 2
NewData = namedtuple('NewData', ['mbid', 'path'])
DefectData = namedtuple('DefectData', ['mbid', 'table'])
class Collector(threading.Thread): class Collector(threading.Thread):
""" """
...@@ -42,22 +39,18 @@ class Collector(threading.Thread): ...@@ -42,22 +39,18 @@ class Collector(threading.Thread):
def run(self): def run(self):
while True: while True:
action, data = Paths.get() mbid, path = Paths.get()
if action == NEW: try:
self._logger.info("Adding file {} to the database.".format(data.path)) self._logger.info("Adding file {} to the database.".format(path))
self.fetch_recording(data.mbid, data.path) self.fetch_recording(mbid, path)
self._session.commit() except musicbrainzngs.WebServiceError as exc:
elif action == DEFECT: self._session.rollback()
self._logger.info("Repair broken entry with mbid {}".format(data)) self._logger.error("Could not connect to Musicbrainz. Path: {} Request: {}".format(path, exc))
#TODO self._session.commit()
elif action == UPDATE:
self._logger.info("Update outdated entry with mbid {}".format(data))
#TODO
else:
self._logger.error("Unknow action: {}".format(action))
Paths.task_done() Paths.task_done()
self._session.close() self._session.close()
def create_artist_credit(self, acresult, acphrase): def create_artist_credit(self, acresult, acphrase):
ac = self._session.query(ArtistCredit).filter_by(name=acphrase).first() ac = self._session.query(ArtistCredit).filter_by(name=acphrase).first()
if ac is None: if ac is None:
...@@ -130,37 +123,31 @@ class Collector(threading.Thread): ...@@ -130,37 +123,31 @@ class Collector(threading.Thread):
def fetch_recording(self, mbid, path): def fetch_recording(self, mbid, path):
recording = self._session.query(Recording).filter_by(gid=mbid).first() recording = self._session.query(Recording).filter_by(gid=mbid).first()
if recording is None: if recording is None:
try: result = fetcher.get_table_by_id(mbid, 'recording')
result = fetcher.get_table_by_id(mbid, 'recording')
#minimal mapping
#minimal mapping recording = Recording()
recording = Recording() recording.gid = result['id']
recording.gid = result['id'] recording.name = result['title']
recording.name = result['title'] recording.path = path
recording.path = path recording.ftype = path.split('.')[-1]
recording.ftype = path.split('.')[-1] recording.artist_credit = self.create_artist_credit(result['artist-credit'], result['artist-credit-phrase'])
recording.artist_credit = self.create_artist_credit(result['artist-credit'], result['artist-credit-phrase']) if 'length' in result:
if 'length' in result: recording.length = result['length']
recording.length = result['length'] self._session.add(recording)
self._session.add(recording)
#extended mapping
#extended mapping for releasedata in result['release-list']:
for releasedata in result['release-list']: release = self.fetch_release(releasedata['id'])
release = self.fetch_release(releasedata['id']) # find track in release,
# find track in release, # this is clearly a workaround and only works efficient because of caching.
# this is clearly a workaround and only works efficient because of caching. # correct way would be to fetch all tracks directly, but the musicbrainz api
# correct way would be to fetch all tracks directly, but the musicbrainz api # offers no way to do this.
# offers no way to do this. mediumlist = fetcher.get_table_by_id(releasedata['id'], 'release')['medium-list']
mediumlist = fetcher.get_table_by_id(releasedata['id'], 'release')['medium-list'] for medium in mediumlist:
for medium in mediumlist: for track in medium['track-list']:
for track in medium['track-list']: if track['recording']['id'] == mbid:
if track['recording']['id'] == mbid: self.create_track(track, release, recording, medium)
self.create_track(track, release, recording, medium)
self._session.commit()
except musicbrainzngs.WebServiceError as exc:
self._session.rollback()
self._logger.error("Could not connect to Musicbrainz. Path: {} Request: {}".format(path, exc))
recording = None
return recording return recording
......
...@@ -7,7 +7,7 @@ from sqlalchemy.orm import Session ...@@ -7,7 +7,7 @@ from sqlalchemy.orm import Session
from model import Recording from model import Recording
from utils import pairwise from utils import pairwise
from collector import Paths, NEW, NewData from collector import Paths
class Walker(threading.Thread): class Walker(threading.Thread):
def __init__(self, session_fac, sourcedir): def __init__(self, session_fac, sourcedir):
...@@ -36,7 +36,7 @@ class Walker(threading.Thread): ...@@ -36,7 +36,7 @@ class Walker(threading.Thread):
if mut is not None: if mut is not None:
mbid = self.get_mbid(mut) mbid = self.get_mbid(mut)
if mbid is not None and mbid not in self._mbids: if mbid is not None and mbid not in self._mbids:
Paths.put((NEW, NewData(mbid=mbid, path=os.path.abspath(filepath)))) Paths.put((mbid,os.path.abspath(filepath)))
self._mbids.add(mbid) self._mbids.add(mbid)
def get_mbid(self, mut): def get_mbid(self, mut):
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment