Commit 0971a492 authored by Fjen Undso's avatar Fjen Undso
Browse files

cleanup

parent bc3e0aff
......@@ -15,13 +15,16 @@ from requests.exceptions import RequestException
from requests_html import HTMLSession, HTMLResponse
from feedgen.feed import FeedGenerator
LWN_LOGIN_URL = "https://lwn.net/Login/"
LWN_RSS_URL = "https://lwn.net/headlines/newrss"
LWN_USERNAME = os.environ.get("LWN_USERNAME")
LWN_PASSWORD = os.environ.get("LWN_PASSWORD")
MYDEALZ_RSS_URL = "https://www.mydealz.de/rss/hot"
RE_MD_HOTSCORE = re.compile(r"^(\d+)")
RE_MD_ID = re.compile(r"(\d+)$")
s = HTMLSession()
app = Flask(__name__)
LWN_USERNAME = os.environ.get("LWN_USERNAME")
LWN_PASSWORD = os.environ.get("LWN_PASSWORD")
@dataclass
......@@ -36,7 +39,7 @@ class Deal:
@property
def content(self) -> str:
return get_content(self.url, ".userHtml-content")
return get_html(self.url, ".userHtml-content")
@property
def score(self) -> int:
......@@ -72,24 +75,27 @@ class DealzFeed:
self.refresh: timedelta = timedelta(minutes=refresh_minutes)
self.feed_db: Dict[int, Deal] = dict()
def _get_score_from_txt(self, txt: str) -> int:
@staticmethod
def _get_score_from_txt(txt: str) -> int:
"""Extract score from from a text/title."""
t = RE_MD_HOTSCORE.search(txt)
return int(t.group(0)) if t else 0
def _get_id(self, id_str: str) -> int:
@staticmethod
def _get_id(id_str: str) -> int:
"""Extract the deal ID from a text."""
t = RE_MD_ID.search(id_str)
return int(t.group(0)) if t else 0
def _get_url(self, url) -> HTMLResponse:
@staticmethod
def _get_url(url) -> HTMLResponse:
r = s.get(url)
r.raise_for_status()
return r
def _ingest_rss(self) -> None:
"""Get new dealz from feed and update scores."""
f = feedparser.parse("https://www.mydealz.de/rss/hot")
f = feedparser.parse(MYDEALZ_RSS_URL)
new_dealz = [
self._get_id(e.id)
for e in f.entries
......@@ -173,7 +179,7 @@ dealz = DealzFeed(expire_hours=3)
@lru_cache(maxsize=1000)
def get_content(url: str, search: str) -> str:
def get_html(url: str, search: str) -> str:
try:
r = s.get(url)
r.raise_for_status()
......@@ -188,11 +194,9 @@ def mydealz_feeds(score: int) -> Response:
for k, v in request.args.items():
if not v.isdigit():
return Response(response=f'Parameter "{k}" is not an int!', status=400)
feed = FeedGenerator()
feed.title("MyDealz")
feed.id("https://mydealz.de")
for d_id, d in dealz.get_dealz():
if not d.is_hot(target_score=score, kw_scores=request.args):
continue
......@@ -208,12 +212,11 @@ def mydealz_feeds(score: int) -> Response:
def lwn_set_cookie() -> None:
lwn_login = "https://lwn.net/Login/"
cookie = [x for x in s.cookies if x.name == "LWNSession1"]
if not (cookie and cookie[0].expires > time()):
data = {"Username": LWN_USERNAME, "Password": LWN_PASSWORD}
try:
r = s.post(lwn_login, data=data)
r = s.post(LWN_LOGIN_URL, data=data)
r.raise_for_status()
except RequestException:
logger.exception("LWN cookie fetch failed")
......@@ -227,20 +230,15 @@ def lwn_is_paid(title: str) -> bool:
@app.route("/lwn")
def lwn_feeds() -> Response:
lwn_set_cookie()
lwn_url = "https://lwn.net/headlines/newrss"
f = feedparser.parse(lwn_url)
f = feedparser.parse(LWN_RSS_URL)
feed = FeedGenerator()
feed.title(f.feed.title)
feed.id("https://lwn.net")
for fe in f.entries:
content = fe.summary
link_url = fe.link.replace("/rss", "")
if lwn_is_paid(fe.title):
content = get_content(link_url, ".ArticleText")
content = get_html(link_url, ".ArticleText")
e = feed.add_entry()
e.id(fe.id)
e.title(fe.title)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment