Commit 40283c93 authored by Gerion Entrup's avatar Gerion Entrup
Browse files

retrieval: add documentation

parent 1b0047ab
"""Abstraction of the retrieval and database function needed for adding new
entries to the database.
This module takes a structure of operations and then handles all the database
and web retrieval tasks in a generic way.
It was written mainly to avoid doubling code and provide a clean interface.
The structure has to be a dict with an entry of Table as key and an Entity
as value. See the documentation of Entity for furthor information.
To use the module, the first call must be init() with the the structure and
a session class (not an instance). Then several create() calls could be made.
The last action is commit(), to commit all to the database.
"""
from retrieval.fetcher import get_table_by_id
from retrieval.entity import Entity, Filter, Table
from retrieval.helper import map_quality, fake_id
......@@ -11,12 +26,32 @@ _retrieval = False
def init(structure, session_fac):
"""Take a structure and a sqlalchemy session instance and initializes
the module.
This function have to be called before doing anything else with the module.
Arguments:
structure -- dict of operations, see the documention of the module to get
details of the format
session_fac -- a SQLAlchemy Session class (not an instance)
"""
global _retrieval
if not _retrieval:
_retrieval = Retrieval(structure, session_fac)
def create(table, *args):
"""Create a table object (row in the database).
Arguments:
table -- The type of the table entry that should be created. Has to be of
type Table.
*args -- All arguments necessary for the specific table.
This function return a valid table object. If the object is in the cache
or in the database, this would be returned, otherwise it creates a new one.
"""
global _retrieval
if not _retrieval:
raise("You have to call init before using create.")
......@@ -24,6 +59,7 @@ def create(table, *args):
def commit():
"""Add all objects to the session and commit it to the database."""
global _retrieval
if not _retrieval:
raise("You have to call init before using create.")
......
......@@ -4,24 +4,67 @@ from collections import namedtuple
def dummy(*args, **kwargs):
"""Take all arguments and do absolutly nothing."""
pass
Entity = namedtuple('Entity', ['parameter', 'mb_class', 'query_filter',
'web', 'mapping', 'reverse_mapping'])
Entity_ = namedtuple('Entity', ['parameter', 'mb_class', 'query_filter',
'web', 'mapping', 'reverse_mapping'])
# set reverse_mapping default to dummy and make it optional
Entity.__new__.__defaults__ = (dummy, )
Entity_.__new__.__defaults__ = (dummy, )
class Entity(Entity_):
"""Collection of data and functions necessary for retrieval and adding
of an object to the database.
Attributes:
parameter -- tuple of strings, that code the arguments later
commited.
mb_class -- The ORM Table class, that is worked with.
query_filter -- The Filter object necessary for the checking SQL query.
web -- The codification of the ID attribute for the web query.
Must be existant in parameter, too.
mapping -- A function that gets an ORM object, if existent a web
query result and the given parameter as arguments. It is
expected that the function then maps the result entries
and parameter to the object.
reverse_mapping -- A function with the same arguments as mapping, but gets
the existent object, instead of a new constructed. This
function is optional and does nothing as default.
"""
class Filter():
"""Represention of a SQLAlchemy filter expression. Used to "cache" the
filter expression, before execution the real query.
The class took every key, value combination as keywords arguments. This
arguments are forwarded to the filter expression. The key in it's original
form, the value is replaced with the relevant parameter value, that can
be specified with Entity.parameter.
Example:
Entity.parameter = ('value',)
Entity.mb_class = Foo
Filter(key='value')
would lead to
session.query(Foo).filter_by(key=<value>)
where <value> is replaced with the appripriate parameter value.
"""
def __init__(self, **kwargs):
self._kwargs = kwargs
def get_kwargs(self):
"""Return a copy of the keyword arguments that represents the
filter.
"""
return self._kwargs.copy()
class Table(enum.Enum):
"""All supported tables."""
artist = 1
artist_credit = 2
artist_credit_name = 3
......
......@@ -8,8 +8,8 @@ import settings
from retrieval.entity import Table
"""
Fetches the musicdata and caches them.
""" Fetches the musicdata and caches them. Could only exist once.
Use it with get_table_by_id() and clean_cache().
"""
......@@ -30,6 +30,7 @@ _time = 0
def _get_recording(mbid):
"""Retrieval function for recording."""
res = musicbrainzngs.get_recording_by_id(
mbid, includes=['releases', 'artists'])['recording']
if res['release-count'] > 25:
......@@ -39,15 +40,18 @@ def _get_recording(mbid):
def _get_release_group(mbid):
"""Retrieval function for release_group."""
return musicbrainzngs.get_release_group_by_id(
mbid, includes=['artist-credits'])['release-group']
def _get_artist(mbid):
"""Retrieval function for artist."""
return musicbrainzngs.get_artist_by_id(mbid)['artist']
def _get_release(mbid):
"""Retrieval function for release."""
return musicbrainzngs.get_release_by_id(
mbid, includes=['artists',
'media',
......@@ -61,10 +65,12 @@ _methods = {Table.recording: _get_recording,
def _time_ms():
"""Return the actual time as int in ms."""
return int(time.time() * 1000)
def _delay():
"""Honor the MusicBrainz accessing delay. Sleep as long as necessary."""
global _time
diff = _time_ms() - _time
if (diff < 1000):
......@@ -73,6 +79,9 @@ def _delay():
def _cache_append(tablename, mbid, result):
"""Append data to the cache. If the cache limit is reached, delete
the oldest entry.
"""
_cache[tablename][mbid] = result
_cachedates.append((_time_ms, tablename, mbid))
if len(_cachedates) > settings.fetcher_cache_length:
......@@ -81,6 +90,11 @@ def _cache_append(tablename, mbid, result):
def clean_cache():
"""Clean all outdated entries out of the cache. This function can be
called everytime.
An outdated entry is an entry older than settings.fetcher_cache_age.
"""
with _lock:
threshold = _time_ms - settings.fetcher_cache_age * 1000
d_time, tablename, mbid = _cachedates[0]
......@@ -91,10 +105,13 @@ def clean_cache():
def get_table_by_id(mbid, tablename):
"""fetch the data from musicbrainz. Asked the cache in transparent
way, if data already available and handles the musicbrainz timing
rules. Throws an musicbrainzngs.WebServiceError if anything goes
wrong.
"""Fetch the data from MusicBrainz. Ask the cache in transparent way, if
data already available and handle the musicbrainz timing rules. Throw a
musicbrainzngs.WebServiceError if anything goes wrong.
Arguments:
mbid -- The id that should be fetched.
tablename -- An entry of retrieval.Table to specify the table.
"""
if tablename not in web:
raise("Error: no Web lookup possible for " + str(tablename))
......
......@@ -2,6 +2,9 @@ import hashlib
def map_quality(value):
"""Map a MusicBrainz entry quality word to a number. This is required by
the database schema.
"""
qualities = {'low': 1,
'normal': 2,
'high': 3}
......@@ -9,5 +12,12 @@ def map_quality(value):
def fake_id(name, value):
"""Fake an UUID based on the SHA-1 sum of the concatenation of name and
value and return it.
Arguments:
name -- arbitrary string
value -- arbitrary string
"""
h = hashlib.sha1((name + value).encode('utf-8')).hexdigest()
return '-'.join([h[0:8], h[8:12], h[12:16], h[16:20], h[20:32]])
......@@ -7,7 +7,11 @@ from contextlib import contextmanager
@contextmanager
def session_scope(Session):
"""Provide a transactional scope around a series of operations."""
"""Provide a transactional scope around a series of operations.
Arguments:
Session -- a SQLAlchemy session class (not an instance)
"""
session = Session()
try:
yield session
......@@ -21,6 +25,7 @@ def session_scope(Session):
class Retrieval():
def __init__(self, structure, Session):
"""See retrieval.init"""
self._logger = logging.getLogger('retrieval')
self._Session = Session
self._structure = structure
......@@ -29,6 +34,9 @@ class Retrieval():
self._cache[key] = {}
def _check_state(self, objects):
"""Check if foreign key objects already in database. If not the case a
query would be meaningless.
"""
state_ok = False
for obj in objects:
try:
......@@ -40,6 +48,7 @@ class Retrieval():
return state_ok
def create(self, table, *args):
"""See retrieval.create"""
entity = self._structure[table]
assert(len(args) == len(entity.parameter))
# konstruct keys
......@@ -74,6 +83,7 @@ class Retrieval():
return obj
def commit(self):
"""See retrieval.commit"""
with session_scope(self._Session) as session:
for table in self._cache.values():
for entity in table.values():
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment