Commit 619d0f3d authored by Gerion Entrup's avatar Gerion Entrup
Browse files

collector: restructuring, cleanup

parent 174d0868
......@@ -17,9 +17,6 @@ from mbdata.models import ArtistCredit, ArtistCreditName, Artist, Release, Relea
from utils import pairwise
Paths = queue.Queue(maxsize=10)
NEW = 0; DEFECT = 1; UPDATE = 2
NewData = namedtuple('NewData', ['mbid', 'path'])
DefectData = namedtuple('DefectData', ['mbid', 'table'])
class Collector(threading.Thread):
"""
......@@ -42,22 +39,18 @@ class Collector(threading.Thread):
def run(self):
while True:
action, data = Paths.get()
if action == NEW:
self._logger.info("Adding file {} to the database.".format(data.path))
self.fetch_recording(data.mbid, data.path)
self._session.commit()
elif action == DEFECT:
self._logger.info("Repair broken entry with mbid {}".format(data))
#TODO
elif action == UPDATE:
self._logger.info("Update outdated entry with mbid {}".format(data))
#TODO
else:
self._logger.error("Unknow action: {}".format(action))
mbid, path = Paths.get()
try:
self._logger.info("Adding file {} to the database.".format(path))
self.fetch_recording(mbid, path)
except musicbrainzngs.WebServiceError as exc:
self._session.rollback()
self._logger.error("Could not connect to Musicbrainz. Path: {} Request: {}".format(path, exc))
self._session.commit()
Paths.task_done()
self._session.close()
def create_artist_credit(self, acresult, acphrase):
ac = self._session.query(ArtistCredit).filter_by(name=acphrase).first()
if ac is None:
......@@ -130,37 +123,31 @@ class Collector(threading.Thread):
def fetch_recording(self, mbid, path):
recording = self._session.query(Recording).filter_by(gid=mbid).first()
if recording is None:
try:
result = fetcher.get_table_by_id(mbid, 'recording')
#minimal mapping
recording = Recording()
recording.gid = result['id']
recording.name = result['title']
recording.path = path
recording.ftype = path.split('.')[-1]
recording.artist_credit = self.create_artist_credit(result['artist-credit'], result['artist-credit-phrase'])
if 'length' in result:
recording.length = result['length']
self._session.add(recording)
#extended mapping
for releasedata in result['release-list']:
release = self.fetch_release(releasedata['id'])
# find track in release,
# this is clearly a workaround and only works efficient because of caching.
# correct way would be to fetch all tracks directly, but the musicbrainz api
# offers no way to do this.
mediumlist = fetcher.get_table_by_id(releasedata['id'], 'release')['medium-list']
for medium in mediumlist:
for track in medium['track-list']:
if track['recording']['id'] == mbid:
self.create_track(track, release, recording, medium)
self._session.commit()
except musicbrainzngs.WebServiceError as exc:
self._session.rollback()
self._logger.error("Could not connect to Musicbrainz. Path: {} Request: {}".format(path, exc))
recording = None
result = fetcher.get_table_by_id(mbid, 'recording')
#minimal mapping
recording = Recording()
recording.gid = result['id']
recording.name = result['title']
recording.path = path
recording.ftype = path.split('.')[-1]
recording.artist_credit = self.create_artist_credit(result['artist-credit'], result['artist-credit-phrase'])
if 'length' in result:
recording.length = result['length']
self._session.add(recording)
#extended mapping
for releasedata in result['release-list']:
release = self.fetch_release(releasedata['id'])
# find track in release,
# this is clearly a workaround and only works efficient because of caching.
# correct way would be to fetch all tracks directly, but the musicbrainz api
# offers no way to do this.
mediumlist = fetcher.get_table_by_id(releasedata['id'], 'release')['medium-list']
for medium in mediumlist:
for track in medium['track-list']:
if track['recording']['id'] == mbid:
self.create_track(track, release, recording, medium)
return recording
......
......@@ -7,7 +7,7 @@ from sqlalchemy.orm import Session
from model import Recording
from utils import pairwise
from collector import Paths, NEW, NewData
from collector import Paths
class Walker(threading.Thread):
def __init__(self, session_fac, sourcedir):
......@@ -36,7 +36,7 @@ class Walker(threading.Thread):
if mut is not None:
mbid = self.get_mbid(mut)
if mbid is not None and mbid not in self._mbids:
Paths.put((NEW, NewData(mbid=mbid, path=os.path.abspath(filepath))))
Paths.put((mbid,os.path.abspath(filepath)))
self._mbids.add(mbid)
def get_mbid(self, mut):
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment