Commit 2a27b2a1 authored by aal's avatar aal
Browse files

funktioniert??? ein wenig

parent fb86b3ec
*.idea
*.pyc
Files/
\ No newline at end of file
# Stud.SAUGER
# Studsoiger
Der [Stud.SAUGER](https://git.finf.uni-hannover.de/kiste/studsauger) lädt alle
Der [Studsoiger](https://git.finf.uni-hannover.de/aal/studsauger) lädt alle
Dateien der eigenen Veranstaltungen aus [Stud.IP](http://www.studip.de/) mittels
der [Rest.IP](https://studip.github.io/studip-rest.ip/)-API herunter.
der [Rest-API](http://hilfe.studip.de/api/namespace_r_e_s_t_a_p_i.html) herunter.
## Authentifizierung
Rest.IP unterstützt zur Authentifizierung `3-legged OAuth`, `Session` per Cookie
oder `HTTP Basic Authentication`. Letzteres verwendet der Stud.SAUGER. Das
oder `HTTP Basic Authentication`. Nichts davon verwendet der studsoiger. Das
Passwort kann entweder per Kommandozeilenparameter übergeben werden, z.B. in
Verbindung mit dem Passwortmanager [pass](https://www.passwordstore.org/)
......
# TODO
- Alle Semester runterladen ist broken
#!/usr/bin/env python3
from studsauger.__main__ import main
from studsauger.studsauger import main
main()
......@@ -7,19 +7,19 @@ setup(
name='studsauger',
version='1.0',
description='Stud.IP download tool',
url='https://git.finf.uni-hannover.de/kiste/studsauger',
url='https://git.finf.uni-hannover.de/aal/studsauger',
author='Christian Buschau',
author_email='christian.buschau@stud.uni-hannover.de',
packages=['studsauger'],
install_requires=[
'requests'
'requests', 'lxml'
],
extras_require={
'Keyring support': ['keyring']
},
entry_points={
'console_scripts': [
'studsauger = studsauger:main'
'studsauger = studsauger.studsauger:main'
]
}
)
Metadata-Version: 2.1
Name: studsauger
Version: 1.0
Summary: Stud.IP download tool
Home-page: https://git.finf.uni-hannover.de/aal/studsauger
Author: Christian Buschau
Author-email: christian.buschau@stud.uni-hannover.de
License: UNKNOWN
Description: UNKNOWN
Platform: UNKNOWN
Provides-Extra: Keyring support
README.md
studsauger/__main__.py
studsauger/studsauger.py
studsauger.egg-info/PKG-INFO
studsauger.egg-info/SOURCES.txt
studsauger.egg-info/dependency_links.txt
studsauger.egg-info/entry_points.txt
studsauger.egg-info/requires.txt
studsauger.egg-info/top_level.txt
\ No newline at end of file
[console_scripts]
studsauger = studsauger.studsauger:main
requests
lxml
[Keyring support]
keyring
# -*- codding: utf-8 -*-
import argparse
import os
from studsauger import mkdir, download, sso_login
def main(args=None):
try:
import keyring
except ImportError:
usekeyring = False
else:
usekeyring = True
parser = argparse.ArgumentParser()
parser.add_argument('-p', '--path', help='Pfad für lokalen Dateibaum')
parser.add_argument('-u', '--user', help='Stud.IP Benutzername')
parser.add_argument('-pw', '--password', help='Stud.IP Passwort')
parser.add_argument('-b', '--blacklist', help='Blacklist für Veranstaltungen, mehrmals angeben für mehrere Veranstaltungen', action='append')
parser.add_argument('-i', '--ignore', help='Blacklist für Dateinamen als regex')
parser.add_argument('-a', '--allsemesters', help='Alle Semester statt nur dem aktuellen', action="store_true")
parser.add_argument('-c', '--config',
help='JSON-Konfigurationsdatei, wird ohne Angabe in $XDG_CONFIG_HOME/studsauger/config.json oder $HOME/.config/studsauger/config.json gesucht')
args = parser.parse_args()
configpath = None
if args.config:
if os.path.isfile(args.config):
configpath = args.config
else:
print('Die Konfigurationsdatei "' + args.config + '" existiert nicht!')
exit(1)
elif 'XDG_CONFIG_HOME' in os.environ:
if os.path.isfile(os.environ['XDG_CONFIG_HOME'] + '/studsauger/config.json'):
configpath = os.environ['XDG_CONFIG_HOME'] + '/studsauger/config.json'
elif 'HOME' in os.environ:
if os.path.isfile(os.environ['HOME'] + '/.config/studsauger/config.json'):
configpath = os.environ['HOME'] + '/.config/studsauger/config.json'
if configpath:
with open(configpath, 'r') as stream:
config = json.load(stream)
else:
config = {}
if args.path:
path = args.path
elif 'path' in config:
path = config['path']
else:
print('Kein Pfad angegeben!')
exit(1)
if args.user:
user = args.user
elif 'user' in config:
user = config['user']
else:
print('Kein Benutzer angegeben!')
exit(1)
keyringpw = None
if usekeyring:
keyringpw = keyring.get_password('studsauger', user)
if keyringpw:
password = keyringpw
if args.password or 'password' in config:
print('Passwort für Benutzer "' + user + '" ist im Keyring und darf nicht angegeben werden!')
exit(1)
elif args.password:
password = args.password
elif 'password' in config:
password = config['password']
else:
print('Kein Passwort angegeben!')
exit(1)
if args.blacklist:
blacklist = args.blacklist
elif 'blacklist' in config:
blacklist = config['blacklist']
else:
blacklist = {}
if args.ignore:
ignore = args.ignore
elif 'ignore' in config:
ignore = config['ignore']
else:
ignore = None
if args.allsemesters:
allsemesters = True
elif 'allsemesters' in config:
allsemesters = config['allsemesters']
else:
allsemesters = False
mkdir(path)
sso_login(user, password)
from .studsauger import main
main()
......@@ -20,20 +20,17 @@ BASEURL = 'https://studip.uni-hannover.de/api.php'
LOGIN_URL = "https://studip.uni-hannover.de/Shibboleth.sso/Login?target=https%3A%2F%2Fstudip.uni-hannover.de%2Findex.php%3Fagain%3Dyes%26sso%3Dshib"
SAML_URL = "https://studip.uni-hannover.de/Shibboleth.sso/SAML2/POST"
#TODO: mayyyybe do all this as an object?
def main(argv=None):
user, password, path, ignore, blacklist = None, None, None, None, None
allsemesters = False
parse_args(user, password, path, allsemesters, ignore, blacklist)
print(user)
print(password)
user, password, path, allsemesters, blacklist, ignore = parse_args()
cookie = sso_login(user, password)
with requests.session() as session:
session.cookies = requests.utils.cookiejar_from_dict(cookie)
scrape(session)
scrape(session, path, allsemesters, blacklist, ignore)
def parse_args(user, password, path, allsemesters, ignore, blacklist):
def parse_args():
try:
import keyring
except ImportError:
......@@ -50,10 +47,10 @@ def parse_args(user, password, path, allsemesters, ignore, blacklist):
parser.add_argument('-a', '--allsemesters', help='Alle Semester statt nur dem aktuellen', action="store_true")
parser.add_argument('-c', '--config', help='JSON-Konfigurationsdatei, wird ohne Angabe in $XDG_CONFIG_HOME/studsauger/config.json oder $HOME/.config/studsauger/config.json gesucht')
args = parser.parse_args()
configpath = None
if args.config:
if os.path.isfile(config):
if os.path.isfile(args.config):
configpath = args.config
else:
print('Konfigurationsdatei "' + args.config + '" existiert nicht.')
......@@ -71,6 +68,7 @@ def parse_args(user, password, path, allsemesters, ignore, blacklist):
else:
config = {}
path = None
if args.path:
path = args.path
elif 'path' in config:
......@@ -98,11 +96,36 @@ def parse_args(user, password, path, allsemesters, ignore, blacklist):
exit(1)
elif args.password:
password = args.password
elif 'password' in config:
password = config['password']
else:
print('Kein Passwort angegeben!')
exit(1)
if args.blacklist:
blacklist = args.blacklist
elif 'blacklist' in config:
blacklist = config['blacklist']
else:
blacklist = {}
if args.ignore:
ignore = args.ignore
elif 'ignore' in config:
ignore = config['ignore']
else:
ignore = None
if args.allsemesters:
allsemesters = True
elif 'allsemesters' in config:
allsemesters = config['allsemesters']
else:
allsemesters = False
return user, password, path, allsemesters, blacklist, ignore
#TODO: return session in case i wanna implement oauth for a month? (and also because it seems prettier)
# beatiful POS that returns the Seminar_Session Cookie necessary for accessing the API
#TODO: return session instead pf cookie for max aesthetics
# beautiful POS, returns the Seminar_Session Cookie necessary for accessing the API
def sso_login(username, password):
# Start session and get login form.
with requests.session() as session:
......@@ -117,12 +140,7 @@ def sso_login(username, password):
response2 = session.post(SAML_URL, data=form2)
return dict(Seminar_Session=session.cookies['Seminar_Session'])
def scrape(session):
allsemesters = False
ignore = ''
path = './Test'
blacklist = {}
###
def scrape(session, path, allsemesters, blacklist, ignore):
json_user = session.get(BASEURL + '/user').json()
json_semesters = session.get(BASEURL + '/semesters', params='limit=1000').json()
json_courses = session.get(BASEURL + '/user/' + json_user['user_id'] + '/courses', params='limit=1000').json()
......@@ -146,34 +164,43 @@ def scrape(session):
database = {}
for key, course in json_courses['collection'].items():
if course['title'] in blacklist:
coursename = course['title']
if coursename in blacklist:
continue
print('=== ' + course['title'] + ' ===')
mkdir(path + '/' + course['title'].replace('/', '_'))
json_folders = session.get(BASEURL + '/course/' + course['course_id'] + '/top_folder', params='limit=1000').json()
for folder in json_folders['subfolders']:
#TODO: go to subfolders as well
json_files = session.get(BASEURL + '/folder/' + folder['id']).json()
for document in json_files['file_refs']:
if re.match(ignore, document['name']):
continue
date = ''
if document['file_id'] in database:
if database[document['file_id']] == document['chdate']:
continue
else:
date = document['chdate']
database[document['file_id']] = document['chdate']
print(document['name'] + str(date))
target = path + '/' + course['title'].replace('/', '_') + '/' + document['name'] + str(date)
download = session.get(BASEURL + '/file/' + document['id'] + '/download', stream=True)
with open(target, 'wb') as downloadfile:
download.raw.decode_content = True
shutil.copyfileobj(download.raw, downloadfile)
print('=== ' + coursename + ' ===')
mkdir(path + '/' + coursename.replace('/', '_'))
json_files = session.get(BASEURL + '/course/' + course['course_id'] + '/top_folder', params='limit=1000').json()
traverseFileTree(json_files, path, ignore, database, session, coursename)
with open(path + '/.database.json', 'w') as databasefile:
json.dump(database, databasefile, indent=4)
def traverseFileTree(root, path, ignore, database, session, coursename):
### download all files in current folders
if 'file_refs' in root:
for fileref in root['file_refs']:
if ignore and re.match(ignore, fileref['name']):
continue
date = ''
if fileref['file_id'] in database:
if database[fileref['file_id']] == fileref['chdate']:
continue
else:
date = fileref['chdate']
database[fileref['file_id']] = fileref['chdate']
print(fileref['name'] + str(date))
target = path + '/' + coursename.replace('/', '_') + '/' + fileref['name'] + str(date)
download = session.get(BASEURL + '/file/' + fileref['id'] + '/download', stream=True)
with open(target, 'wb') as downloadfile:
download.raw.decode_content = True
shutil.copyfileobj(download.raw, downloadfile)
### Look for subfolders
if 'subfolders' in root and root['subfolders']:
for subfolder in root['subfolders']:
#TODO: implement making subfolders, for people that really like clicking a lot?????
#subpath = path + '/' + coursename.replace('/', '_')
#mkdir(subpath)
traverseFileTree(subfolder, path, ignore, database, session, coursename)
def mkdir(directory):
if not os.path.exists(directory):
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment