Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Gerion Entrup
brainzfs
Commits
619d0f3d
Commit
619d0f3d
authored
Aug 18, 2016
by
Gerion Entrup
Browse files
collector: restructuring, cleanup
parent
174d0868
Changes
2
Hide whitespace changes
Inline
Side-by-side
collector.py
View file @
619d0f3d
...
@@ -17,9 +17,6 @@ from mbdata.models import ArtistCredit, ArtistCreditName, Artist, Release, Relea
...
@@ -17,9 +17,6 @@ from mbdata.models import ArtistCredit, ArtistCreditName, Artist, Release, Relea
from
utils
import
pairwise
from
utils
import
pairwise
Paths
=
queue
.
Queue
(
maxsize
=
10
)
Paths
=
queue
.
Queue
(
maxsize
=
10
)
NEW
=
0
;
DEFECT
=
1
;
UPDATE
=
2
NewData
=
namedtuple
(
'NewData'
,
[
'mbid'
,
'path'
])
DefectData
=
namedtuple
(
'DefectData'
,
[
'mbid'
,
'table'
])
class
Collector
(
threading
.
Thread
):
class
Collector
(
threading
.
Thread
):
"""
"""
...
@@ -42,22 +39,18 @@ class Collector(threading.Thread):
...
@@ -42,22 +39,18 @@ class Collector(threading.Thread):
def
run
(
self
):
def
run
(
self
):
while
True
:
while
True
:
action
,
data
=
Paths
.
get
()
mbid
,
path
=
Paths
.
get
()
if
action
==
NEW
:
try
:
self
.
_logger
.
info
(
"Adding file {} to the database."
.
format
(
data
.
path
))
self
.
_logger
.
info
(
"Adding file {} to the database."
.
format
(
path
))
self
.
fetch_recording
(
data
.
mbid
,
data
.
path
)
self
.
fetch_recording
(
mbid
,
path
)
self
.
_session
.
commit
()
except
musicbrainzngs
.
WebServiceError
as
exc
:
elif
action
==
DEFECT
:
self
.
_session
.
rollback
()
self
.
_logger
.
info
(
"Repair broken entry with mbid {}"
.
format
(
data
))
self
.
_logger
.
error
(
"Could not connect to Musicbrainz. Path: {} Request: {}"
.
format
(
path
,
exc
))
#TODO
self
.
_session
.
commit
()
elif
action
==
UPDATE
:
self
.
_logger
.
info
(
"Update outdated entry with mbid {}"
.
format
(
data
))
#TODO
else
:
self
.
_logger
.
error
(
"Unknow action: {}"
.
format
(
action
))
Paths
.
task_done
()
Paths
.
task_done
()
self
.
_session
.
close
()
self
.
_session
.
close
()
def
create_artist_credit
(
self
,
acresult
,
acphrase
):
def
create_artist_credit
(
self
,
acresult
,
acphrase
):
ac
=
self
.
_session
.
query
(
ArtistCredit
).
filter_by
(
name
=
acphrase
).
first
()
ac
=
self
.
_session
.
query
(
ArtistCredit
).
filter_by
(
name
=
acphrase
).
first
()
if
ac
is
None
:
if
ac
is
None
:
...
@@ -130,37 +123,31 @@ class Collector(threading.Thread):
...
@@ -130,37 +123,31 @@ class Collector(threading.Thread):
def
fetch_recording
(
self
,
mbid
,
path
):
def
fetch_recording
(
self
,
mbid
,
path
):
recording
=
self
.
_session
.
query
(
Recording
).
filter_by
(
gid
=
mbid
).
first
()
recording
=
self
.
_session
.
query
(
Recording
).
filter_by
(
gid
=
mbid
).
first
()
if
recording
is
None
:
if
recording
is
None
:
try
:
result
=
fetcher
.
get_table_by_id
(
mbid
,
'recording'
)
result
=
fetcher
.
get_table_by_id
(
mbid
,
'recording'
)
#minimal mapping
#minimal mapping
recording
=
Recording
()
recording
=
Recording
()
recording
.
gid
=
result
[
'id'
]
recording
.
gid
=
result
[
'id'
]
recording
.
name
=
result
[
'title'
]
recording
.
name
=
result
[
'title'
]
recording
.
path
=
path
recording
.
path
=
path
recording
.
ftype
=
path
.
split
(
'.'
)[
-
1
]
recording
.
ftype
=
path
.
split
(
'.'
)[
-
1
]
recording
.
artist_credit
=
self
.
create_artist_credit
(
result
[
'artist-credit'
],
result
[
'artist-credit-phrase'
])
recording
.
artist_credit
=
self
.
create_artist_credit
(
result
[
'artist-credit'
],
result
[
'artist-credit-phrase'
])
if
'length'
in
result
:
if
'length'
in
result
:
recording
.
length
=
result
[
'length'
]
recording
.
length
=
result
[
'length'
]
self
.
_session
.
add
(
recording
)
self
.
_session
.
add
(
recording
)
#extended mapping
#extended mapping
for
releasedata
in
result
[
'release-list'
]:
for
releasedata
in
result
[
'release-list'
]:
release
=
self
.
fetch_release
(
releasedata
[
'id'
])
release
=
self
.
fetch_release
(
releasedata
[
'id'
])
# find track in release,
# find track in release,
# this is clearly a workaround and only works efficient because of caching.
# this is clearly a workaround and only works efficient because of caching.
# correct way would be to fetch all tracks directly, but the musicbrainz api
# correct way would be to fetch all tracks directly, but the musicbrainz api
# offers no way to do this.
# offers no way to do this.
mediumlist
=
fetcher
.
get_table_by_id
(
releasedata
[
'id'
],
'release'
)[
'medium-list'
]
mediumlist
=
fetcher
.
get_table_by_id
(
releasedata
[
'id'
],
'release'
)[
'medium-list'
]
for
medium
in
mediumlist
:
for
medium
in
mediumlist
:
for
track
in
medium
[
'track-list'
]:
for
track
in
medium
[
'track-list'
]:
if
track
[
'recording'
][
'id'
]
==
mbid
:
if
track
[
'recording'
][
'id'
]
==
mbid
:
self
.
create_track
(
track
,
release
,
recording
,
medium
)
self
.
create_track
(
track
,
release
,
recording
,
medium
)
self
.
_session
.
commit
()
except
musicbrainzngs
.
WebServiceError
as
exc
:
self
.
_session
.
rollback
()
self
.
_logger
.
error
(
"Could not connect to Musicbrainz. Path: {} Request: {}"
.
format
(
path
,
exc
))
recording
=
None
return
recording
return
recording
...
...
walker.py
View file @
619d0f3d
...
@@ -7,7 +7,7 @@ from sqlalchemy.orm import Session
...
@@ -7,7 +7,7 @@ from sqlalchemy.orm import Session
from
model
import
Recording
from
model
import
Recording
from
utils
import
pairwise
from
utils
import
pairwise
from
collector
import
Paths
,
NEW
,
NewData
from
collector
import
Paths
class
Walker
(
threading
.
Thread
):
class
Walker
(
threading
.
Thread
):
def
__init__
(
self
,
session_fac
,
sourcedir
):
def
__init__
(
self
,
session_fac
,
sourcedir
):
...
@@ -36,7 +36,7 @@ class Walker(threading.Thread):
...
@@ -36,7 +36,7 @@ class Walker(threading.Thread):
if
mut
is
not
None
:
if
mut
is
not
None
:
mbid
=
self
.
get_mbid
(
mut
)
mbid
=
self
.
get_mbid
(
mut
)
if
mbid
is
not
None
and
mbid
not
in
self
.
_mbids
:
if
mbid
is
not
None
and
mbid
not
in
self
.
_mbids
:
Paths
.
put
((
NEW
,
NewData
(
mbid
=
mbid
,
path
=
os
.
path
.
abspath
(
filepath
)))
)
Paths
.
put
((
mbid
,
os
.
path
.
abspath
(
filepath
)))
self
.
_mbids
.
add
(
mbid
)
self
.
_mbids
.
add
(
mbid
)
def
get_mbid
(
self
,
mut
):
def
get_mbid
(
self
,
mut
):
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment