Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Gerion Entrup
brainzfs
Commits
1bba597d
Commit
1bba597d
authored
Sep 23, 2016
by
Gerion Entrup
Browse files
retrieval: replace retrieval.Table with the ORM objects of mbdata
parent
40283c93
Changes
5
Hide whitespace changes
Inline
Side-by-side
collector.py
View file @
1bba597d
...
...
@@ -7,7 +7,7 @@ import musicbrainzngs
from
mbdata.models
import
ArtistCredit
,
ArtistCreditName
,
Artist
,
Release
,
ReleaseGroup
,
Medium
,
Track
,
MediumFormat
from
retrieval
import
Entity
,
Filter
,
Table
from
retrieval
import
Entity
,
Filter
from
model
import
Recording
from
utils
import
pairwise
...
...
@@ -39,7 +39,7 @@ class Collector(threading.Thread):
self
.
_logger
.
info
(
"Adding file "
+
"{} to the database."
.
format
(
path
))
try
:
retrieval
.
create
(
Table
.
r
ecording
,
mbid
,
path
)
retrieval
.
create
(
R
ecording
,
mbid
,
path
)
retrieval
.
commit
()
except
musicbrainzngs
.
WebServiceError
as
exc
:
self
.
_logger
.
error
(
"Could not connect to Musicbrainz. "
...
...
@@ -51,10 +51,10 @@ class Collector(threading.Thread):
# non nullable attributes
release
.
gid
=
result
[
'id'
]
release
.
name
=
result
[
'title'
]
release
.
artist_credit
=
retrieval
.
create
(
Table
.
a
rtist
_c
redit
,
release
.
artist_credit
=
retrieval
.
create
(
A
rtist
C
redit
,
result
[
'artist-credit'
],
result
[
'artist-credit-phrase'
])
release
.
release_group
=
retrieval
.
create
(
Table
.
r
elease
_g
roup
,
release
.
release_group
=
retrieval
.
create
(
R
elease
G
roup
,
result
[
'release-group'
][
'id'
])
# nullable attributes
...
...
@@ -63,7 +63,7 @@ class Collector(threading.Thread):
# extended mapping
for
medium
in
result
[
'medium-list'
]:
retrieval
.
create
(
Table
.
m
edium
,
release
,
medium
[
'position'
],
medium
)
retrieval
.
create
(
M
edium
,
release
,
medium
[
'position'
],
medium
)
def
_recording_mapping
(
recording
,
result
,
mbid
,
path
):
# non nullable attributes
...
...
@@ -72,7 +72,7 @@ class Collector(threading.Thread):
recording
.
fgid
=
mbid
recording
.
path
=
path
recording
.
ftype
=
os
.
path
.
splitext
(
path
)[
1
][
1
:]
recording
.
artist_credit
=
retrieval
.
create
(
Table
.
a
rtist
_c
redit
,
recording
.
artist_credit
=
retrieval
.
create
(
A
rtist
C
redit
,
result
[
'artist-credit'
],
result
[
'artist-credit-phrase'
])
...
...
@@ -82,17 +82,17 @@ class Collector(threading.Thread):
# extended mapping
for
releasedata
in
result
[
'release-list'
]:
release
=
retrieval
.
create
(
Table
.
r
elease
,
releasedata
[
'id'
])
release
=
retrieval
.
create
(
R
elease
,
releasedata
[
'id'
])
# Find track in release.
# This is clearly a workaround and only works efficient
# because of caching. Correct way would be to fetch all tracks
# directly, but the musicbrainz api offers no way to do this.
mediumlist
=
retrieval
.
get_table_by_id
(
releasedata
[
'id'
],
Table
.
r
elease
)[
'medium-list'
]
releasedata
[
'id'
],
R
elease
)[
'medium-list'
]
for
medium
in
mediumlist
:
for
track
in
medium
[
'track-list'
]:
if
track
[
'recording'
][
'id'
]
==
result
[
'id'
]:
retrieval
.
create
(
T
able
.
t
rack
,
track
[
'id'
],
release
,
retrieval
.
create
(
Track
,
track
[
'id'
],
release
,
recording
,
medium
,
track
)
def
_artist_mapping
(
artist
,
result
,
mbid
):
...
...
@@ -104,7 +104,7 @@ class Collector(threading.Thread):
def
_release_group_mapping
(
release_group
,
result
,
mbid
):
release_group
.
gid
=
result
[
'id'
]
release_group
.
name
=
result
[
'title'
]
release_group
.
artist_credit
=
retrieval
.
create
(
Table
.
a
rtist
_c
redit
,
release_group
.
artist_credit
=
retrieval
.
create
(
A
rtist
C
redit
,
result
[
'artist-credit'
],
result
[
'artist-credit-phrase'
])
...
...
@@ -120,7 +120,7 @@ class Collector(threading.Thread):
# extended mapping
for
idx
,
(
data
,
joinphrase
)
in
enumerate
(
pairwise
(
acresult
+
[
''
])):
data
[
'joinphrase'
]
=
joinphrase
retrieval
.
create
(
Table
.
a
rtist
_c
redit
_n
ame
,
retrieval
.
create
(
A
rtist
C
redit
N
ame
,
artist_credit
,
idx
+
1
,
# SQL IDs begin with 1
data
)
...
...
@@ -132,7 +132,7 @@ class Collector(threading.Thread):
# non nullable attributes
acn
.
artist_credit
=
artist_credit
acn
.
position
=
position
acn
.
artist
=
retrieval
.
create
(
Table
.
a
rtist
,
data
[
'artist'
][
'id'
])
acn
.
artist
=
retrieval
.
create
(
A
rtist
,
data
[
'artist'
][
'id'
])
acn
.
name
=
data
[
'name'
]
if
'name'
in
data
else
data
[
'artist'
][
'name'
]
acn
.
join_phrase
=
data
[
'joinphrase'
]
...
...
@@ -142,7 +142,7 @@ class Collector(threading.Thread):
medium
.
position
=
medium_data
[
'position'
]
medium
.
track_count
=
medium_data
[
'track-count'
]
if
'format'
in
medium_data
:
medium
.
format
=
retrieval
.
create
(
Table
.
m
edium
_f
ormat
,
medium
.
format
=
retrieval
.
create
(
M
edium
F
ormat
,
medium_data
[
'format'
])
def
_medium_format_mapping
(
medium_format
,
name
):
...
...
@@ -157,7 +157,7 @@ class Collector(threading.Thread):
track
.
number
=
data
[
'number'
]
track
.
recording
=
recording
track
.
name
=
recording
.
name
track
.
medium
=
retrieval
.
create
(
Table
.
m
edium
,
release
,
track
.
medium
=
retrieval
.
create
(
M
edium
,
release
,
medium_data
[
'position'
],
medium_data
)
track
.
artist_credit
=
recording
.
artist_credit
track
.
artist_credit
.
ref_count
+=
1
...
...
@@ -165,54 +165,44 @@ class Collector(threading.Thread):
track
.
length
=
data
[
'length'
]
_structure
=
{
Table
.
release
:
Entity
(
parameter
=
(
'mbid'
,),
mb_class
=
Release
,
query_filter
=
Filter
(
gid
=
'mbid'
),
web
=
'mbid'
,
mapping
=
_release_mapping
),
Table
.
recording
:
Entity
(
parameter
=
(
'mbid'
,
'path'
),
mb_class
=
Recording
,
query_filter
=
Filter
(
gid
=
'mbid'
),
web
=
'mbid'
,
mapping
=
_recording_mapping
),
Table
.
artist
:
Entity
(
parameter
=
(
'mbid'
,),
mb_class
=
Artist
,
Release
:
Entity
(
parameter
=
(
'mbid'
,),
query_filter
=
Filter
(
gid
=
'mbid'
),
web
=
'mbid'
,
mapping
=
_release_mapping
),
Recording
:
Entity
(
parameter
=
(
'mbid'
,
'path'
),
query_filter
=
Filter
(
gid
=
'mbid'
),
web
=
'mbid'
,
mapping
=
_recording_mapping
),
Artist
:
Entity
(
parameter
=
(
'mbid'
,),
query_filter
=
Filter
(
gid
=
'mbid'
),
web
=
'mbid'
,
mapping
=
_artist_mapping
),
ReleaseGroup
:
Entity
(
parameter
=
(
'mbid'
,),
query_filter
=
Filter
(
gid
=
'mbid'
),
web
=
'mbid'
,
mapping
=
_artist_mapping
),
Table
.
release_group
:
Entity
(
parameter
=
(
'mbid'
,),
mb_class
=
ReleaseGroup
,
query_filter
=
Filter
(
gid
=
'mbid'
),
web
=
'mbid'
,
mapping
=
_release_group_mapping
),
Table
.
artist_credit
:
Entity
(
parameter
=
(
'acresult'
,
'acphrase'
),
mb_class
=
ArtistCredit
,
query_filter
=
Filter
(
name
=
'acphrase'
),
web
=
None
,
mapping
=
_artist_credit_mapping
,
reverse_mapping
=
_artist_credit_r_mapping
),
Table
.
artist_credit_name
:
Entity
(
parameter
=
(
'artist_credit'
,
'position'
,
'data'
),
mb_class
=
ArtistCreditName
,
query_filter
=
Filter
(
artist_credit
=
'artist_credit'
,
position
=
'position'
),
web
=
None
,
mapping
=
_artist_credit_name_mapping
),
Table
.
medium
:
Entity
(
parameter
=
(
'release'
,
'position'
,
'medium_data'
),
mb_class
=
Medium
,
query_filter
=
Filter
(
release
=
'release'
,
position
=
'position'
),
mapping
=
_release_group_mapping
),
ArtistCredit
:
Entity
(
parameter
=
(
'acresult'
,
'acphrase'
),
query_filter
=
Filter
(
name
=
'acphrase'
),
web
=
None
,
mapping
=
_medium_mapping
),
Table
.
medium_format
:
Entity
(
parameter
=
(
'name'
,),
mb_class
=
MediumFormat
,
query_filter
=
Filter
(
name
=
'name'
),
web
=
None
,
mapping
=
_medium_format_mapping
),
Table
.
track
:
Entity
(
parameter
=
(
'mbid'
,
'release'
,
'recording'
,
'medium_data'
,
'data'
),
mb_class
=
Track
,
query_filter
=
Filter
(
gid
=
'mbid'
),
web
=
None
,
mapping
=
_track_mapping
)}
mapping
=
_artist_credit_mapping
,
reverse_mapping
=
_artist_credit_r_mapping
),
ArtistCreditName
:
Entity
(
parameter
=
(
'artist_credit'
,
'position'
,
'data'
),
query_filter
=
Filter
(
artist_credit
=
'artist_credit'
,
position
=
'position'
),
web
=
None
,
mapping
=
_artist_credit_name_mapping
),
Medium
:
Entity
(
parameter
=
(
'release'
,
'position'
,
'medium_data'
),
query_filter
=
Filter
(
release
=
'release'
,
position
=
'position'
),
web
=
None
,
mapping
=
_medium_mapping
),
MediumFormat
:
Entity
(
parameter
=
(
'name'
,),
query_filter
=
Filter
(
name
=
'name'
),
web
=
None
,
mapping
=
_medium_format_mapping
),
Track
:
Entity
(
parameter
=
(
'mbid'
,
'release'
,
'recording'
,
'medium_data'
,
'data'
),
query_filter
=
Filter
(
gid
=
'mbid'
),
web
=
None
,
mapping
=
_track_mapping
)}
retrieval/__init__.py
View file @
1bba597d
...
...
@@ -6,7 +6,7 @@ and web retrieval tasks in a generic way.
It was written mainly to avoid doubling code and provide a clean interface.
The structure has to be a dict with an
entry of Table
as key and an Entity
The structure has to be a dict with an
ORM table class
as key and an Entity
as value. See the documentation of Entity for furthor information.
To use the module, the first call must be init() with the the structure and
...
...
@@ -14,12 +14,11 @@ a session class (not an instance). Then several create() calls could be made.
The last action is commit(), to commit all to the database.
"""
from
retrieval.fetcher
import
get_table_by_id
from
retrieval.entity
import
Entity
,
Filter
,
Table
from
retrieval.entity
import
Entity
,
Filter
from
retrieval.helper
import
map_quality
,
fake_id
from
retrieval.retrieval
import
Retrieval
__all__
=
[
'get_table_by_id'
,
'Table'
,
'Entity'
,
'Filter'
,
'create'
,
'init'
,
'commit'
,
__all__
=
[
'get_table_by_id'
,
'Entity'
,
'Filter'
,
'create'
,
'init'
,
'commit'
,
'map_quality'
,
'fake_id'
]
_retrieval
=
False
...
...
@@ -45,8 +44,8 @@ def create(table, *args):
"""Create a table object (row in the database).
Arguments:
table -- The type of the table entry that should be created. Has to be
of
type Table
.
table -- The type of the table entry that should be created. Has to be
an
ORM table class
.
*args -- All arguments necessary for the specific table.
This function return a valid table object. If the object is in the cache
...
...
retrieval/entity.py
View file @
1bba597d
import
enum
from
collections
import
namedtuple
...
...
@@ -8,8 +6,8 @@ def dummy(*args, **kwargs):
pass
Entity_
=
namedtuple
(
'Entity'
,
[
'parameter'
,
'mb_class'
,
'query_filter'
,
'web'
,
'mapping'
,
'reverse_mapping'
])
Entity_
=
namedtuple
(
'Entity'
,
[
'parameter'
,
'query_filter'
,
'web'
,
'mapping'
,
'reverse_mapping'
])
# set reverse_mapping default to dummy and make it optional
Entity_
.
__new__
.
__defaults__
=
(
dummy
,
)
...
...
@@ -21,7 +19,6 @@ class Entity(Entity_):
Attributes:
parameter -- tuple of strings, that code the arguments later
commited.
mb_class -- The ORM Table class, that is worked with.
query_filter -- The Filter object necessary for the checking SQL query.
web -- The codification of the ID attribute for the web query.
Must be existant in parameter, too.
...
...
@@ -61,16 +58,3 @@ class Filter():
filter.
"""
return
self
.
_kwargs
.
copy
()
class
Table
(
enum
.
Enum
):
"""All supported tables."""
artist
=
1
artist_credit
=
2
artist_credit_name
=
3
medium
=
4
medium_format
=
5
recording
=
6
release
=
7
release_group
=
8
track
=
9
retrieval/fetcher.py
View file @
1bba597d
...
...
@@ -6,14 +6,14 @@ import threading
import
settings
from
retrieval.entity
import
Tabl
e
from
mbdata.models
import
Recording
,
ReleaseGroup
,
Artist
,
Releas
e
""" Fetches the musicdata and caches them. Could only exist once.
Use it with get_table_by_id() and clean_cache().
"""
web
=
[
Table
.
r
ecording
,
Table
.
r
elease
_g
roup
,
Table
.
artist
,
Table
.
r
elease
]
web
=
[
R
ecording
,
R
elease
G
roup
,
Artist
,
R
elease
]
musicbrainzngs
.
set_useragent
(
"brainzfs"
,
"0.1-alpha"
,
"https://git.finf.uni-hannover.de/Chrysops/brainzfs"
)
...
...
@@ -58,10 +58,10 @@ def _get_release(mbid):
'recordings'
,
'release-groups'
])[
'release'
]
_methods
=
{
Table
.
r
ecording
:
_get_recording
,
Table
.
r
elease
_g
roup
:
_get_release_group
,
Table
.
a
rtist
:
_get_artist
,
Table
.
r
elease
:
_get_release
}
_methods
=
{
R
ecording
:
_get_recording
,
R
elease
G
roup
:
_get_release_group
,
A
rtist
:
_get_artist
,
R
elease
:
_get_release
}
def
_time_ms
():
...
...
@@ -111,7 +111,7 @@ def get_table_by_id(mbid, tablename):
Arguments:
mbid -- The id that should be fetched.
tablename -- An
entry of retrieval.Table
to specify the table.
tablename -- An
ORM table class
to specify the table.
"""
if
tablename
not
in
web
:
raise
(
"Error: no Web lookup possible for "
+
str
(
tablename
))
...
...
retrieval/retrieval.py
View file @
1bba597d
...
...
@@ -64,12 +64,12 @@ class Retrieval():
# search the database if object exists
elif
self
.
_check_state
(
kwargs
.
values
()):
with
session_scope
(
self
.
_Session
)
as
session
:
rs
=
session
.
query
(
entity
.
mb_class
).
filter_by
(
**
kwargs
).
first
()
rs
=
session
.
query
(
table
).
filter_by
(
**
kwargs
).
first
()
if
rs
is
not
None
:
entity
.
reverse_mapping
(
rs
,
*
args
)
return
rs
obj
=
entity
.
mb_class
()
obj
=
table
()
# fetch the data from web
if
entity
.
web
is
not
None
:
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment