Commit 19e9d9b2 authored by Gerion Entrup's avatar Gerion Entrup
Browse files

collector, fetcher: fix bug and reformatting

Bug fix was mainly to comment out the MediumFormat code. This does not
work properly without changing python-musicbrainzngs.
parent 0fd9f6c7
import pprint
import mutagen
import musicbrainzngs
import itertools
import threading
import queue
import logging
import os.path
import musicbrainzngs
from sqlalchemy import and_
from sqlalchemy.orm import Session
from musicbrainzngs import WebServiceError
from collections import namedtuple
from mbdata.models import ArtistCredit, ArtistCreditName, Artist, Release, ReleaseGroup, Medium, Track, MediumFormat
import fetcher
from fetcher import Table
from model import Recording
from mbdata.models import ArtistCredit, ArtistCreditName, Artist, Release, ReleaseGroup, Medium, Track, MediumFormat
from utils import pairwise
Paths = queue.Queue(maxsize=10)
class Collector(threading.Thread):
"""
Collects Tags and write them to the database.
......@@ -29,57 +26,147 @@ class Collector(threading.Thread):
self._session = session_fac()
self._logger = logging.getLogger('collector')
@classmethod
def get_unthreaded_instance(cls, session, logger):
instance = cls(None, threaded=False)
instance._session = session
instance._logger = logger
def run(self):
while True:
mbid, path = Paths.get()
try:
self._logger.info("Adding file {} to the database.".format(path))
self._logger.info("Adding file " +
"{} to the database.".format(path))
self.fetch_recording(mbid, path)
except musicbrainzngs.WebServiceError as exc:
self._session.rollback()
self._logger.error("Could not connect to Musicbrainz. Path: {} Request: {}".format(path, exc))
self._logger.error("Could not connect to Musicbrainz. " +
"Path: {} Request: {}".format(path, exc))
self._session.commit()
Paths.task_done()
self._session.close()
def create_artist_credit(self, acresult, acphrase):
ac = self._session.query(ArtistCredit).filter_by(name=acphrase).first()
if ac is None:
# non nullable attributes
ac = ArtistCredit()
ac.name = acphrase
acns = self.create_artist_credit_name(acresult, ac)
ac.artist_count = len(acns)
# nullable attributes
ac.ref_count = 1
self._session.add(ac)
for acn in acns:
self._session.add(acn)
else:
# nullable attributes
ac.ref_count += 1
return ac
def create_artist_credit_name(self, acresult, artistcredit):
acns = []
for artist, joinphrase in pairwise(acresult + [""]):
for artist, joinphrase in pairwise(acresult + ['']):
acn = ArtistCreditName()
# non nullable attributes
acn.artist_credit = artistcredit
acn.position = len(acns) + 1
acn.artist = self.fetch_artist(artist['artist']['id'])
acn.name = artist['name'] if 'name' in artist else artist['artist']['name']
if 'name' in artist:
# the recording uses not the standard artist name
acn.name = artist['name']
else:
acn.name = artist['artist']['name']
acn.join_phrase = joinphrase
acns.append(acn)
return acns
def fetch_artist(self, mbid):
artist = self._session.query(Artist).filter_by(gid=mbid).first()
if artist is None:
result = fetcher.get_table_by_id(mbid, Table.artist)
# non nullable attributes
artist = Artist()
artist.gid = result['id']
artist.name = result['name']
artist.sort_name = result['sort-name']
# add to db
self._session.add(artist)
return artist
def fetch_recording(self, mbid, path):
recording = self._session.query(Recording).filter_by(gid=mbid).first()
if recording is None:
result = fetcher.get_table_by_id(mbid, Table.recording)
# non nullable attributes
recording = Recording()
recording.gid = result['id']
recording.name = result['title']
recording.path = path
recording.ftype = os.path.splitext(path)[1][1:]
recording.artist_credit = self.create_artist_credit(
result['artist-credit'], result['artist-credit-phrase'])
# nullable attributes
if 'length' in result:
recording.length = result['length']
self._session.add(recording)
# extended mapping
for releasedata in result['release-list']:
release = self.fetch_release(releasedata['id'])
# Find track in release.
# This is clearly a workaround and only works efficient
# because of caching. Correct way would be to fetch all tracks
# directly, but the musicbrainz api offers no way to do this.
mediumlist = fetcher.get_table_by_id(
releasedata['id'], Table.release)['medium-list']
for medium in mediumlist:
for track in medium['track-list']:
if track['recording']['id'] == mbid:
self.create_track(track,
release,
recording,
medium)
return recording
def fetch_release(self, mbid):
release = self._session.query(Release).filter_by(gid=mbid).first()
if release is None:
result = fetcher.get_table_by_id(mbid, Table.release)
# non nullable attributes
release = Release()
release.gid = result['id']
release.name = result['title']
release.artist_credit = self.create_artist_credit(
result['artist-credit'], result['artist-credit-phrase'])
release.release_group = self.fetch_release_group(
result['release-group']['id'])
# nullable attributes
release.quality = self.create_quality(result['quality'])
self._session.add(release)
# extended mapping
self.create_medium(result['medium-list'], release)
return release
def create_quality(self, quality):
qualities = {'low': 1,
'normal': 2,
'high': 3}
return qualities[quality]
def create_medium(self, mediumdata, release):
mediums = []
for med in mediumdata:
......@@ -87,8 +174,9 @@ class Collector(threading.Thread):
medium.release = release
medium.position = med['position']
medium.track_count = len(med['track-list'])
if 'format' in med:
medium.format = self.create_medium_format(med['format'])
# TODO report in bug, this requires an id
# if 'format' in med:
# medium.format = self.create_medium_format(med['format'])
mediums.append(medium)
self._session.add(medium)
......@@ -111,8 +199,9 @@ class Collector(threading.Thread):
track.number = trackdata['number']
track.recording = recording
track.name = recording.name
track.medium = self._session.query(Medium).filter(and_(Medium.position == mediumdata['position'],
Medium.release == release)).one()
track.medium = self._session.query(Medium).filter(
and_(Medium.position == mediumdata['position'],
Medium.release == release)).one()
track.artist_credit = recording.artist_credit
track.artist_credit.ref_count += 1
if 'length' in trackdata:
......@@ -120,82 +209,15 @@ class Collector(threading.Thread):
self._session.add(track)
return track
def fetch_recording(self, mbid, path):
recording = self._session.query(Recording).filter_by(gid=mbid).first()
if recording is None:
result = fetcher.get_table_by_id(mbid, 'recording')
#minimal mapping
recording = Recording()
recording.gid = result['id']
recording.name = result['title']
recording.path = path
recording.ftype = path.split('.')[-1]
recording.artist_credit = self.create_artist_credit(result['artist-credit'], result['artist-credit-phrase'])
if 'length' in result:
recording.length = result['length']
self._session.add(recording)
#extended mapping
for releasedata in result['release-list']:
release = self.fetch_release(releasedata['id'])
# find track in release,
# this is clearly a workaround and only works efficient because of caching.
# correct way would be to fetch all tracks directly, but the musicbrainz api
# offers no way to do this.
mediumlist = fetcher.get_table_by_id(releasedata['id'], 'release')['medium-list']
for medium in mediumlist:
for track in medium['track-list']:
if track['recording']['id'] == mbid:
self.create_track(track, release, recording, medium)
return recording
def fetch_release_group(self, mbid):
rg = self._session.query(ReleaseGroup).filter_by(gid=mbid).first()
if rg is None:
result = fetcher.get_table_by_id(mbid, 'release-group')
result = fetcher.get_table_by_id(mbid, Table.release_group)
rg = ReleaseGroup()
rg.gid = result['id']
rg.name = result['title']
rg.artist_credit = self.create_artist_credit(result['artist-credit'], result['artist-credit-phrase'])
rg.artist_credit = self.create_artist_credit(
result['artist-credit'], result['artist-credit-phrase'])
self._session.add(rg)
return rg
def fetch_artist(self, mbid):
artist = self._session.query(Artist).filter_by(gid=mbid).first()
if artist is None:
result = fetcher.get_table_by_id(mbid, 'artist')
artist = Artist()
artist.gid = result['id']
artist.name = result['name']
artist.sort_name = result['sort-name']
#add to db
self._session.add(artist)
return artist
def fetch_release(self, mbid):
release = self._session.query(Release).filter_by(gid=mbid).first()
if release is None:
result = fetcher.get_table_by_id(mbid, 'release')
#minimal mapping
release = Release()
release.gid = result['id']
release.name = result['title']
release.artist_credit = self.create_artist_credit(result['artist-credit'], result['artist-credit-phrase'])
release.release_group = self.fetch_release_group(result['release-group']['id'])
self._session.add(release)
#extended mapping
self.create_medium(result['medium-list'], release)
return release
......@@ -3,38 +3,64 @@ import logging
import musicbrainzngs
import time
import threading
import enum
import settings
"""
Fetches the musicdata and caches them.
"""
_logger = logging.getLogger('collector.fetcher')
class Table(enum.Enum):
recording = 0
release_group = 1
artist = 2
release = 3
musicbrainzngs.set_useragent("brainzfs", "0.1-alpha",
"https://git.finf.uni-hannover.de/Chrysops/brainzfs")
_logger = logging.getLogger('collector.fetcher')
_lock = threading.Lock()
_cache = {'recording': {},
'release-group': {},
'artist': {},
'release': {}}
_cache = {}
for table in Table:
_cache[table] = {}
_cachedates = collections.deque()
_time = 0
_methods = {
'recording': lambda mbid: musicbrainzngs.get_recording_by_id(mbid, includes=['releases', 'artists'])['recording'],
'release-group': lambda mbid: musicbrainzngs.get_release_group_by_id(mbid, includes=['artist-credits'])['release-group'],
'artist': lambda mbid: musicbrainzngs.get_artist_by_id(mbid)['artist'],
'release': lambda mbid: musicbrainzngs.get_release_by_id(mbid, includes=['artists',
'media',
'recordings',
'release-groups'])['release']
}
# for debugging
# from offline import CACHE
# _cache = CACHE
def _get_recording(mbid):
res = musicbrainzngs.get_recording_by_id(
mbid, includes=['releases', 'artists'])['recording']
if res['release-count'] > 25:
res['release-list'] = musicbrainzngs.browse_releases(
recording=mbid, limit=100)['release-list']
return res
def _get_release_group(mbid):
return musicbrainzngs.get_release_group_by_id(
mbid, includes=['artist-credits'])['release-group']
def _get_artist(mbid):
return musicbrainzngs.get_artist_by_id(mbid)['artist']
def _get_release(mbid):
return musicbrainzngs.get_release_by_id(
mbid, includes=['artists',
'media',
'recordings',
'release-groups'])['release']
_methods = {Table.recording: _get_recording,
Table.release_group: _get_release_group,
Table.artist: _get_artist,
Table.release: _get_release}
def _time_ms():
......
#!/usr/bin/env python3
import fetcher
from utils import p_print
res = fetcher._get_recording('4b442925-9d2a-41e0-b958-4c2739a09d45')
p_print(res)
......@@ -37,7 +37,7 @@ class Plumber(threading.Thread):
if action == Action.clean_cache:
fetcher.clean_cache()
if action == Action.clean_database:
pass
self.clean_database()
self._session.close()
def check_recordings(self):
......@@ -57,6 +57,9 @@ class Plumber(threading.Thread):
def clean_recording(self, recording):
pass
def clean_database():
pass
def check_update_time(self, entity):
print("last_updated:")
print(entity.last_updated)
......
......@@ -409,7 +409,7 @@ class FuseFile(FuseIO, Node):
Node.__init__(self, parent, None, data)
self._gid = gid
self._path = path
self._attr.st_size = os.path.getsize(path)
self._attr.st_size = os.path.getsize(get_path(path))
def __repr__(self):
return ''.join(["FuseFile(mode=", oct(self._attr.st_mode),
......
......@@ -23,7 +23,6 @@ class Walker(threading.Thread):
for file in files:
mbid = None
filepath = os.path.join(root, file)
self.log.debug('Add file to queue: {}'.format(filepath))
try:
mut = mutagen.File(filepath)
except AttributeError:
......@@ -37,6 +36,8 @@ class Walker(threading.Thread):
if mut is not None:
mbid = self.get_mbid(mut)
if mbid is not None and mbid not in self._mbids:
self.log.debug('Add file to queue: '
'{}, mbid: {}'.format(filepath, mbid))
Paths.put((mbid,
os.path.relpath(filepath,
settings.directory_prefix)))
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment