Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Fjen Undso
feedproxy
Commits
0971a492
Commit
0971a492
authored
Jul 15, 2020
by
Fjen Undso
Browse files
cleanup
parent
bc3e0aff
Changes
1
Hide whitespace changes
Inline
Side-by-side
main.py
View file @
0971a492
...
@@ -15,13 +15,16 @@ from requests.exceptions import RequestException
...
@@ -15,13 +15,16 @@ from requests.exceptions import RequestException
from
requests_html
import
HTMLSession
,
HTMLResponse
from
requests_html
import
HTMLSession
,
HTMLResponse
from
feedgen.feed
import
FeedGenerator
from
feedgen.feed
import
FeedGenerator
LWN_LOGIN_URL
=
"https://lwn.net/Login/"
LWN_RSS_URL
=
"https://lwn.net/headlines/newrss"
LWN_USERNAME
=
os
.
environ
.
get
(
"LWN_USERNAME"
)
LWN_PASSWORD
=
os
.
environ
.
get
(
"LWN_PASSWORD"
)
MYDEALZ_RSS_URL
=
"https://www.mydealz.de/rss/hot"
RE_MD_HOTSCORE
=
re
.
compile
(
r
"^(\d+)"
)
RE_MD_HOTSCORE
=
re
.
compile
(
r
"^(\d+)"
)
RE_MD_ID
=
re
.
compile
(
r
"(\d+)$"
)
RE_MD_ID
=
re
.
compile
(
r
"(\d+)$"
)
s
=
HTMLSession
()
s
=
HTMLSession
()
app
=
Flask
(
__name__
)
app
=
Flask
(
__name__
)
LWN_USERNAME
=
os
.
environ
.
get
(
"LWN_USERNAME"
)
LWN_PASSWORD
=
os
.
environ
.
get
(
"LWN_PASSWORD"
)
@
dataclass
@
dataclass
...
@@ -36,7 +39,7 @@ class Deal:
...
@@ -36,7 +39,7 @@ class Deal:
@
property
@
property
def
content
(
self
)
->
str
:
def
content
(
self
)
->
str
:
return
get_
content
(
self
.
url
,
".userHtml-content"
)
return
get_
html
(
self
.
url
,
".userHtml-content"
)
@
property
@
property
def
score
(
self
)
->
int
:
def
score
(
self
)
->
int
:
...
@@ -72,24 +75,27 @@ class DealzFeed:
...
@@ -72,24 +75,27 @@ class DealzFeed:
self
.
refresh
:
timedelta
=
timedelta
(
minutes
=
refresh_minutes
)
self
.
refresh
:
timedelta
=
timedelta
(
minutes
=
refresh_minutes
)
self
.
feed_db
:
Dict
[
int
,
Deal
]
=
dict
()
self
.
feed_db
:
Dict
[
int
,
Deal
]
=
dict
()
def
_get_score_from_txt
(
self
,
txt
:
str
)
->
int
:
@
staticmethod
def
_get_score_from_txt
(
txt
:
str
)
->
int
:
"""Extract score from from a text/title."""
"""Extract score from from a text/title."""
t
=
RE_MD_HOTSCORE
.
search
(
txt
)
t
=
RE_MD_HOTSCORE
.
search
(
txt
)
return
int
(
t
.
group
(
0
))
if
t
else
0
return
int
(
t
.
group
(
0
))
if
t
else
0
def
_get_id
(
self
,
id_str
:
str
)
->
int
:
@
staticmethod
def
_get_id
(
id_str
:
str
)
->
int
:
"""Extract the deal ID from a text."""
"""Extract the deal ID from a text."""
t
=
RE_MD_ID
.
search
(
id_str
)
t
=
RE_MD_ID
.
search
(
id_str
)
return
int
(
t
.
group
(
0
))
if
t
else
0
return
int
(
t
.
group
(
0
))
if
t
else
0
def
_get_url
(
self
,
url
)
->
HTMLResponse
:
@
staticmethod
def
_get_url
(
url
)
->
HTMLResponse
:
r
=
s
.
get
(
url
)
r
=
s
.
get
(
url
)
r
.
raise_for_status
()
r
.
raise_for_status
()
return
r
return
r
def
_ingest_rss
(
self
)
->
None
:
def
_ingest_rss
(
self
)
->
None
:
"""Get new dealz from feed and update scores."""
"""Get new dealz from feed and update scores."""
f
=
feedparser
.
parse
(
"https://www.mydealz.de/rss/hot"
)
f
=
feedparser
.
parse
(
MYDEALZ_RSS_URL
)
new_dealz
=
[
new_dealz
=
[
self
.
_get_id
(
e
.
id
)
self
.
_get_id
(
e
.
id
)
for
e
in
f
.
entries
for
e
in
f
.
entries
...
@@ -173,7 +179,7 @@ dealz = DealzFeed(expire_hours=3)
...
@@ -173,7 +179,7 @@ dealz = DealzFeed(expire_hours=3)
@
lru_cache
(
maxsize
=
1000
)
@
lru_cache
(
maxsize
=
1000
)
def
get_
content
(
url
:
str
,
search
:
str
)
->
str
:
def
get_
html
(
url
:
str
,
search
:
str
)
->
str
:
try
:
try
:
r
=
s
.
get
(
url
)
r
=
s
.
get
(
url
)
r
.
raise_for_status
()
r
.
raise_for_status
()
...
@@ -188,11 +194,9 @@ def mydealz_feeds(score: int) -> Response:
...
@@ -188,11 +194,9 @@ def mydealz_feeds(score: int) -> Response:
for
k
,
v
in
request
.
args
.
items
():
for
k
,
v
in
request
.
args
.
items
():
if
not
v
.
isdigit
():
if
not
v
.
isdigit
():
return
Response
(
response
=
f
'Parameter "
{
k
}
" is not an int!'
,
status
=
400
)
return
Response
(
response
=
f
'Parameter "
{
k
}
" is not an int!'
,
status
=
400
)
feed
=
FeedGenerator
()
feed
=
FeedGenerator
()
feed
.
title
(
"MyDealz"
)
feed
.
title
(
"MyDealz"
)
feed
.
id
(
"https://mydealz.de"
)
feed
.
id
(
"https://mydealz.de"
)
for
d_id
,
d
in
dealz
.
get_dealz
():
for
d_id
,
d
in
dealz
.
get_dealz
():
if
not
d
.
is_hot
(
target_score
=
score
,
kw_scores
=
request
.
args
):
if
not
d
.
is_hot
(
target_score
=
score
,
kw_scores
=
request
.
args
):
continue
continue
...
@@ -208,12 +212,11 @@ def mydealz_feeds(score: int) -> Response:
...
@@ -208,12 +212,11 @@ def mydealz_feeds(score: int) -> Response:
def
lwn_set_cookie
()
->
None
:
def
lwn_set_cookie
()
->
None
:
lwn_login
=
"https://lwn.net/Login/"
cookie
=
[
x
for
x
in
s
.
cookies
if
x
.
name
==
"LWNSession1"
]
cookie
=
[
x
for
x
in
s
.
cookies
if
x
.
name
==
"LWNSession1"
]
if
not
(
cookie
and
cookie
[
0
].
expires
>
time
()):
if
not
(
cookie
and
cookie
[
0
].
expires
>
time
()):
data
=
{
"Username"
:
LWN_USERNAME
,
"Password"
:
LWN_PASSWORD
}
data
=
{
"Username"
:
LWN_USERNAME
,
"Password"
:
LWN_PASSWORD
}
try
:
try
:
r
=
s
.
post
(
lwn_login
,
data
=
data
)
r
=
s
.
post
(
LWN_LOGIN_URL
,
data
=
data
)
r
.
raise_for_status
()
r
.
raise_for_status
()
except
RequestException
:
except
RequestException
:
logger
.
exception
(
"LWN cookie fetch failed"
)
logger
.
exception
(
"LWN cookie fetch failed"
)
...
@@ -227,20 +230,15 @@ def lwn_is_paid(title: str) -> bool:
...
@@ -227,20 +230,15 @@ def lwn_is_paid(title: str) -> bool:
@
app
.
route
(
"/lwn"
)
@
app
.
route
(
"/lwn"
)
def
lwn_feeds
()
->
Response
:
def
lwn_feeds
()
->
Response
:
lwn_set_cookie
()
lwn_set_cookie
()
f
=
feedparser
.
parse
(
LWN_RSS_URL
)
lwn_url
=
"https://lwn.net/headlines/newrss"
f
=
feedparser
.
parse
(
lwn_url
)
feed
=
FeedGenerator
()
feed
=
FeedGenerator
()
feed
.
title
(
f
.
feed
.
title
)
feed
.
title
(
f
.
feed
.
title
)
feed
.
id
(
"https://lwn.net"
)
feed
.
id
(
"https://lwn.net"
)
for
fe
in
f
.
entries
:
for
fe
in
f
.
entries
:
content
=
fe
.
summary
content
=
fe
.
summary
link_url
=
fe
.
link
.
replace
(
"/rss"
,
""
)
link_url
=
fe
.
link
.
replace
(
"/rss"
,
""
)
if
lwn_is_paid
(
fe
.
title
):
if
lwn_is_paid
(
fe
.
title
):
content
=
get_content
(
link_url
,
".ArticleText"
)
content
=
get_html
(
link_url
,
".ArticleText"
)
e
=
feed
.
add_entry
()
e
=
feed
.
add_entry
()
e
.
id
(
fe
.
id
)
e
.
id
(
fe
.
id
)
e
.
title
(
fe
.
title
)
e
.
title
(
fe
.
title
)
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment