Commit cebf0090 authored by Gerion Entrup's avatar Gerion Entrup
Browse files

Merge branch 'newtranslator'

parents e0605a93 7d13cede
*.pyc
__pycache__
music.db
.ropeproject
......@@ -2,7 +2,6 @@
import argparse
import sys
import os.path
import logging
#workaround for sqlite pseudo concurrency
......@@ -16,41 +15,56 @@ from sqlalchemy.pool import StaticPool
from collector import Collector
from translator import Translator
from walker import Walker
from mbdata.models import Base, Recording
from mbdata.models import Base
from mbdata import patch_model_schemas, NO_SCHEMAS
class Main:
"""Main class, that starts all other parts of the program"""
def init_database(self, dbfile):
def _init_database(self, dbfile):
"""Initialize the database. Currently only sqlite
Arguments:
dbfile -- the path of the database
"""
database = 'sqlite:///'
engine = create_engine(database + dbfile,
connect_args={'check_same_thread':False},
#echo = True,
poolclass=StaticPool)
connect_args={'check_same_thread':False},
#echo = True,
poolclass=StaticPool)
patch_model_schemas(NO_SCHEMAS)
Base.metadata.create_all(engine)
session_factory = sessionmaker(bind=engine)
Session = scoped_session(session_factory)
session = scoped_session(session_factory)
if database == 'sqlite:///':
#dirty workaroud for sqlite non concurrent mode in python
#remove it once python supports fully concurrency in sqlite
self.session = sqlitequeue.get_Session(Session)
self.session = sqlitequeue.get_Session(session)
else:
self.session = Session
self.session = session
def main(self, args):
version="0.1-alpha"
parser = argparse.ArgumentParser(description="Create a directory structure of music with symlinks based on musicbrainz data", add_help=True)
parser.add_argument('--version', '-V', action='version', version='%(prog)s {0}'.format(version))
"""Main method. Parses cmdline arguments and starts the program."""
version = "0.1-alpha"
parser = argparse.ArgumentParser(add_help=True,
description="Create a directory structure of music with" \
"symlinks based on musicbrainz data")
parser.add_argument('--version', '-V', action='version',
version='%(prog)s {0}'.format(version))
parser.add_argument('sourcedir', help="filepath of source file directory")
parser.add_argument('mountpoint', help="where it should be mounted")
parser.add_argument('--verbose', '-v', action="store_true", default=False, help="lookup files with musicbrainz (better results, but takes a long time")
parser.add_argument('--verbose', '-v', action="store_true", default=False,
help="change to loglevel to info")
parser.add_argument('--database', '-d', default="music.db", help="path of the database")
parser.add_argument('--logfile', '-l', help="log into a logfile")
arg = parser.parse_args(args[1:])
logging.basicConfig(level=logging.DEBUG,
format='%(asctime)s %(levelname)-5s %(name)-18s %(message)s',
datefmt='%y-%m-%d %H:%M')
logger = logging.getLogger('main')
self.init_database(arg.database)
self._init_database(arg.database)
logger.info("database initialized")
collector = Collector(self.session)
......@@ -59,6 +73,9 @@ class Main:
walker = Walker(self.session, arg.sourcedir)
walker.start()
#from mbdata.models import Recording, Release, ReleaseGroup, Track, Artist, ArtistCredit, ArtistCreditName, Medium
#from sqlalchemy.sql import select
#se = self.session()
#from utils import debug_breakpoint
#debug_breakpoint()
......@@ -69,8 +86,4 @@ class Main:
translator.join()
if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG,
format='%(asctime)s %(levelname)-5s %(name)-18s %(message)s',
datefmt='%y-%m-%d %H:%M')
logger = logging.getLogger('main')
Main().main(sys.argv)
......@@ -9,12 +9,16 @@ import logging
from sqlalchemy import and_
from sqlalchemy.orm import Session
from musicbrainzngs import WebServiceError
from collections import namedtuple
from mbdata.models import Recording, ArtistCredit, ArtistCreditName, Artist, Release, ReleaseGroup, Medium, Track, MediumFormat
from utils import pairwise
from fetcher import Fetcher
Paths = queue.Queue(maxsize=10)
NEW = 0; DEFECT = 1; UPDATE = 2
NewData = namedtuple('NewData', ['mbid', 'path'])
DefectData = namedtuple('DefectData', ['mbid', 'table'])
class Collector(threading.Thread):
"""
......@@ -29,10 +33,19 @@ class Collector(threading.Thread):
def run(self):
while True:
mbid, path = Paths.get()
self.log.info("Working on file: {}".format(path))
self.fetch_recording(mbid, path)
self._session.commit()
action, data = Paths.get()
if action == NEW:
self.log.info("Adding file {} to the database.".format(data.path))
self.fetch_recording(data.mbid, data.path)
self._session.commit()
elif action == DEFECT:
self.log.info("Repair broken entry with mbid {}".format(data))
#TODO
elif action == UPDATE:
self.log.info("Update outdated entry with mbid {}".format(data))
#TODO
else:
self.log.error("Unknow action: {}".format(action))
Paths.task_done()
self._session.close()
......@@ -43,10 +56,13 @@ class Collector(threading.Thread):
ac.name = acphrase
acns = self.create_artist_credit_name(acresult, ac)
ac.artist_count = len(acns)
ac.ref_count = 1
self._session.add(ac)
for acn in acns:
self._session.add(acn)
else:
ac.ref_count += 1
return ac
def create_artist_credit_name(self, acresult, artistcredit):
......@@ -89,16 +105,18 @@ class Collector(threading.Thread):
if track is None:
track = Track()
track.gid = trackdata['id']
track.position = trackdata['position']
track.number = trackdata['number']
track.recording = recording
track.name = recording.name
track.medium = self._session.query(Medium).filter(and_(Medium.position == mediumdata['position'],
Medium.release == release)).one()
track.artist_credit = recording.artist_credit
track.position = trackdata['position']
track.number = trackdata['number']
track.name = recording.name
track.artist_credit.ref_count += 1
if 'length' in trackdata:
track.length = trackdata['length']
self._session.add(track)
return track
def fetch_recording(self, mbid, path):
recording = self._session.query(Recording).filter_by(gid=mbid).first()
......
import llfuse
from hlfuse.model import *
from hlfuse.operations import Operations
def init(mountpoint, rootnode, options=[]):
opfs = Operations(rootnode)
llfuse.init(opfs, mountpoint, options)
try:
llfuse.main(single=True)
except:
llfuse.close(unmount=False)
raise
import llfuse
import time
import os
import stat
class FuseIO():
def __init__(self, mode):
self._attr = llfuse.EntryAttributes()
self._attr.st_mode = mode
self._attr.st_nlink = 1
self._attr.st_size = 0
self._attr.st_ino = id(self)
self._attr.generation = 0
self._attr.st_rdev = 0
self._attr.attr_timeout = 300
self._attr.entry_timeout = 300
self._attr.st_atime = int(time.time())
self._attr.st_mtime = int(time.time())
self._attr.st_ctime = int(time.time())
self._attr.st_blksize = 512
self._attr.st_blocks = 1
self._attr.st_gid = os.getgid()
self._attr.st_uid = os.getuid()
def add_nlink(self):
self._attr.st_nlink += 1
def get_inode(self):
return self._attr.st_ino
def get_attr(self):
return self._attr
class FuseFile(FuseIO):
def __init__(self, mode):
super().__init__(stat.S_IFREG | mode)
self._content = b''
self._pos = 0
def set_content(self, content):
self._content = content
self._attr.st_size = len(content)
def open(self):
# Could be used for more complex Files
pass
def close(self):
# Could be used for more complex Files
pass
def seek(self, offset):
self._pos = offset
def read(self, size):
return self._content[self._pos:self._pos + size]
class FuseDir(FuseIO):
def __init__(self, mode, root=False):
super().__init__(stat.S_IFDIR | mode)
self._files = {}
self._attr.st_size = 4096
if root:
self._attr.st_ino = llfuse.ROOT_INODE
self._attr.st_nlink = 2
def get_files(self):
return self._files.items()
def add_file(self, filename, file):
file.add_nlink()
self._files[filename] = file
if isinstance(file, FuseDir):
self._attr.st_nlink += 1
import llfuse, errno
import sys
fse = sys.getfilesystemencoding()
def bytes2str(s):
return s.decode(fse, 'surrogateescape')
def str2bytes(s):
return s.encode(fse, 'surrogateescape')
class Operations(llfuse.Operations):
def __init__(self, root):
super(llfuse.Operations, self).__init__()
self._cache = {llfuse.ROOT_INODE: root}
def getattr(self, inode):
file = self._get_file(inode)
return file.get_attr()
def lookup(self, parent_inode, name):
for filename, file in self._get_file(parent_inode).get_files():
if filename == bytes2str(name):
self._fill_in_file(file)
return file.get_attr()
raise llfuse.FUSEError(errno.ENOENT)
def opendir(self, inode):
if self._get_file(inode) is None:
raise llfuse.FUSEError(errno.ENOENT)
return inode
def readdir(self, fh, off):
files = list(self._get_file(fh).get_files())
for i in range(off, len(files)):
yield (str2bytes(files[i][0]), files[i][1].get_attr(), i+1)
def open(self, inode, flags):
self._get_file(inode).open()
return inode
def flush(self, fh):
self._get_file(fh).close()
def read(self, fh, off, size):
file = self._get_file(fh)
file.seek(off)
return file.read(size)
def _get_file(self, inode):
if inode in self._cache:
return self._cache[inode]
raise llfuse.FUSEError(errno.ENOENT)
def _fill_in_file(self, file):
self._cache[file.get_inode()] = file
import os.path
from collector import Paths, DefectData, DEFECT, UPDATE
class Plumber(threading.Thread):
"""Check and find out how to repair the database."""
def __init__(self, session_fac):
super().__init__(target=self)
self._session = session_fac()
self.log = logging.getLogger('plumber')
def run(self):
check_recordings()
check_artist_credits()
check_artist_credit_names()
check_artists()
check_release_groups()
check_releases()
check_tracks()
check_mediums()
def check_entitys(self, table, check_fail, check_defect=lambda x: False):
for entity in self._session.query(table).all():
#clean file if necessary
if check_fail(entity) or (hasattr(entity, 'gid') and mbid_error(entity.gid)):
self._session.delete(entity)
#check for defect
if check_defect(entity):
Paths.put((DEFECT, DefectData(mbid=entity.gid, table=table)))
check_update_time(entity)
def check_recordings(self):
check_entitys(Recording,
lambda x: not os.path.isfile(x.path),
check_defect=lambda x: x.artist_credit == None)
def check_artist_credits(self):
check_entitys(ArtistCredit,
lambda ac: self._session.query(Recording).filter(ac == Recording.artist_credit).first() == None)
def check_artist_credit_names(self):
pass
def check_artists(self):
pass
def check_release_groups(self):
pass
def check_releases(self):
pass
def check_tracks(self):
pass
def check_mediums(self):
pass
def mbid_error(mbid):
return False
def check_update_time(entity):
print("last_updated:")
print(entity.last_updated)
print(type(entity.last_updated))
default_mode_dir = 0o555
default_mode_file = 0o444
import sqlfuse.data
import sqlfuse.operations
from sqlfuse.path import FPath, QPath
from sqlfuse.where import Where
from sqlfuse.functions import Functions
__all__ = ['FPath', 'QPath', 'Where', 'Functions']
def init(session, fs):
f_data = sqlfuse.data.Data(session, fs)
return sqlfuse.operations.Operations(f_data)
import llfuse
from sqlfuse.model import FuseDir, Special
import logging
class Data:
def __init__(self, session, fs):
self.logger = logging.getLogger('sqlfuse.data')
self._cache = llfuse.ROOT_INODE * [None]
self._session = session
self._tree = FuseDir(None, llfuse.ROOT_INODE, None, "/", self,
root=True)
self._cache.append(self._tree)
self._create_tree(fs)
self.logger.debug("created tree")
self.logger.debug(self.print_tree())
def _create_tree(self, fs):
dtree = [{}, None]
# build tree as dict
for cpath in fs:
path = cpath.path.split('/')
t = dtree
if path[0] == '':
path = path[1:]
for f in path:
if f not in t[0]:
t[0][f] = [{}, None]
t = t[0][f]
t[1] = cpath
# build real objects
queue = [(dtree[0], self._tree)]
for (dnode, node) in queue:
for filename in dnode:
(subdnode, qpath) = dnode[filename]
nnode = self._create_tree_node(node, qpath, filename)
if len(subdnode) > 0:
queue.append((subdnode, nnode))
def _create_tree_node(self, node, qpath, filename):
# if self defined
if qpath is not None:
if qpath.fuse_io:
ndir = node.add_fuseio(
qpath.get(node, self._session, filename))
else:
ndir = node.add_special(
Special(qpath.query, qpath.functions, qpath.where,
self._session, node, filename, self,
qpath.fileattr))
else:
ndir = FuseDir(None, -1, node, filename, self)
if not node.is_special():
self.pin_inode(ndir)
node.add_fuseio(ndir, filename)
return ndir
def get_file(result):
pass
def get(self, inode):
try:
return self._cache[inode]
except KeyError:
return None
def pin_inode(self, fuseio):
ino = len(self._cache)
fuseio.set_inode(ino)
self._cache.append(fuseio)
def print_tree(self):
res = ''
queue = [('/', self._tree, 0)]
while len(queue) > 0:
(name, elem, indent) = queue.pop()
res += ''.join([' ' * indent, name, ': ', elem.__repr__(), '\n'])
for (name, child) in elem.get_children().items():
queue.append((name, child, indent + 2))
for special in elem.get_specials():
queue.append(
('special', special, indent + 2))
return res
import re
import logging
from sqlalchemy.orm.attributes import InstrumentedAttribute
class Functions:
def __init__(self, *args):
self.logger = logging.getLogger('sqlfuse.functions')
self._fltt_re = re.compile('([^A-Z])([A-Z])')
self._name = []
self._name_regex = '^'
self._regex_groups = []
for arg in args:
if type(arg) == str:
self._name_regex += re.escape(arg)
self._name.append((True, arg))
elif type(arg) == InstrumentedAttribute:
attr_name = self._flatten(arg)
self._name_regex += '(?P<{}>.*)'.format(attr_name)
self._regex_groups.append((arg, attr_name))
self._name.append((False, attr_name))
else:
raise Exception("Type must be str or InstrumentedAttribute")
self._name_regex = re.compile(self._name_regex + '$')
self.logger.debug(
"generated regex: {}".format(self._name_regex.pattern))
@classmethod
def construct_with_functions(cls, gen_name, parse_name):
n_cls = cls()
n_cls.gen_name = gen_name
n_cls.parse_name = parse_name
return n_cls
def _flatten(self, attr):
return self._fltt_re.sub(r'\1_\2', str(attr).replace('.', '_')).lower()
def gen_name(self, row):
name = []
for (is_str, elem) in self._name:
if is_str:
name.append(elem)
else:
try:
name.append(getattr(row, elem))
except AttributeError:
raise Exception('Cannot access ' + elem +
'. Row: ' + str(row) +
'Maybe your select miss a column.')
return ''.join(name)
def parse_name(self, name, query):
self.logger.debug("parse name: ", name)
res = self._name_regex.match(name)
if res is None:
return None
for (attr, group_name) in self._regex_groups:
query = query.where(attr == res.group(group_name))
return query
import llfuse
import time
import os
import stat
import copy
import logging
import settings
class FuseIO():
def __init__(self, mode, inode, attr=None):
self._inode = inode
if attr:
self._attr = attr
else:
self._attr = self._construct_attr(mode)
def _construct_attr(self, mode):
attr = llfuse.EntryAttributes()
attr.st_mode = mode
attr.st_nlink = 1
attr.st_size = 0
# attr.generation = 0
# attr.st_rdev = 0
# attr.attr_timeout = 300
# attr.entry_timeout = 300
attr.st_atime_ns = int(time.time())
attr.st_mtime_ns = int(time.time())
attr.st_ctime_ns = int(time.time())
# attr.st_blksize = 512
# attr.st_blocks = 1
attr.st_gid = os.getgid()
attr.st_uid = os.getuid()
return attr
def get_inode(self):
if self._inode == -1:
raise Exception("Special File")
else:
return self._inode
def<