Commit 1fb9840a authored by aal's avatar aal
Browse files

unbreakify (but still broken)

parent ac28793c
......@@ -20,6 +20,14 @@ BASEURL = 'https://studip.uni-hannover.de/api.php'
LOGIN_URL = "https://studip.uni-hannover.de/Shibboleth.sso/Login?target=https%3A%2F%2Fstudip.uni-hannover.de%2Findex.php%3Fagain%3Dyes%26sso%3Dshib"
SAML_URL = "https://studip.uni-hannover.de/Shibboleth.sso/SAML2/POST"
def main():
cookie = sso_login('user', 'password')
with requests.session() as session:
session.cookies = requests.utils.cookiejar_from_dict(cookie)
scrape(session)
# beatiful POS that returns the Seminar_Session Cookie necessary for accessing the API
def sso_login(username, password):
# Start session and get login form.
with requests.session() as session:
......@@ -32,68 +40,67 @@ def sso_login(username, password):
hidden_inputs = SAML_html.xpath(r'//form//input[@type="hidden"]')
form2 = {x.attrib["name"]: x.attrib["value"] for x in hidden_inputs}
response2 = session.post(SAML_URL, data=form2)
print(session.cookies)
# print(response2.text)
print(session.get(BASEURL + '/user').text)
def download():
with requests.Session() as session:
session.auth = (user, password)
# json_user = session.get(BASEURL + '/user').json()
print(session.get(BASEURL + '/user').request.headers)
# json_semesters = session.get(BASEURL + '/semesters', params='limit=1000').json()
# json_courses = session.get(BASEURL + '/user/' + json_user['user_id'] + '/courses', params='limit=1000').json()
#
# if not allsemesters:
# latest = None
# unixnow = int(time.time())
# for key, semester in json_semesters['collection'].items():
# if unixnow >= semester['begin'] and unixnow <= semester['end']:
# latest = key
# break
# if not latest:
# print('Aktuelles Semester nicht gefunden')
# exit(1)
#
# if os.path.isfile(path + '/.database.json'):
# with open(path + '/.database.json', 'r') as databasefile:
# database = json.load(databasefile)
# else:
# database = {}
#
# for key, course in json_courses['collection'].items():
# if not allsemesters and course['start_semester'] != latest:
# continue
# if course['title'] in blacklist:
# continue
# print('=== ' + course['title'] + ' ===')
# mkdir(path + '/' + course['title'].replace('/', '_'))
# json_files = session.get(BASEURL + '/course/' + course['course_id'] + '/files', params='limit=1000').json()
# for key, file in json_files['collection'].items():
# if not file['documents']:
# continue
# for key, document in file['documents'].items():
# if re.match(ignore, document['filename']):
# continue
# date = ''
# if document['file_id'] in database:
# if database[document['file_id']] == document['chdate']:
# continue
# else:
# date = document['chdate']
# database[document['file_id']] = document['chdate']
# print(document['filename'] + str(date))
# target = path + '/' + course['title'].replace('/', '_') + '/' + document['filename'] + str(date)
# download = session.get(BASEURL + '/file/' + document['file_id'] + '/content', stream=True)
# with open(target, 'wb') as downloadfile:
# download.raw.decode_content = True
# shutil.copyfileobj(download.raw, downloadfile)
#
# with open(path + '/.database.json', 'w') as databasefile:
# json.dump(database, databasefile, indent=4)
#
#
return dict(Seminar_Session=session.cookies['Seminar_Session'])
def scrape(session):
allsemesters = False
ignore = None
path = './Test'
blacklist = {}
###
json_user = session.get(BASEURL + '/user').json()
json_semesters = session.get(BASEURL + '/semesters', params='limit=1000').json()
json_courses = session.get(BASEURL + '/user/' + json_user['user_id'] + '/courses', params='limit=1000').json()
if not allsemesters:
latest = None
unixnow = int(time.time())
for key, semester in json_semesters['collection'].items():
if unixnow >= semester['begin'] and unixnow <= semester['end']:
latest = semester['id']
json_courses = session.get(BASEURL + '/user/' + json_user['user_id'] + '/courses?semester=' + latest, params='limit=1000').json()
break
if not latest:
print('Aktuelles Semester nicht gefunden')
exit(1)
if os.path.isfile(path + '/.database.json'):
with open(path + '/.database.json', 'r') as databasefile:
database = json.load(databasefile)
else:
database = {}
for key, course in json_courses['collection'].items():
if course['title'] in blacklist:
continue
print('=== ' + course['title'] + ' ===')
mkdir(path + '/' + course['title'].replace('/', '_'))
json_folders = session.get(BASEURL + '/course/' + course['course_id'] + '/top_folder', params='limit=1000').json()
for folder in json_folders['subfolders']:
json_files = session.get(BASEURL + '/folder/' + folder['id']).json()
for document in json_files['file_refs']:
#if re.match(ignore, document['name']):
# continue
date = ''
if document['file_id'] in database:
if database[document['file_id']] == document['chdate']:
continue
else:
date = document['chdate']
database[document['file_id']] = document['chdate']
print(document['name'] + str(date))
target = path + '/' + course['title'].replace('/', '_') + '/' + document['name'] + str(date)
download = session.get(BASEURL + '/file/' + document['id'] + '/download', stream=True)
with open(target, 'wb') as downloadfile:
download.raw.decode_content = True
shutil.copyfileobj(download.raw, downloadfile)
with open(path + '/.database.json', 'w') as databasefile:
json.dump(database, databasefile, indent=4)
def mkdir(directory):
if not os.path.exists(directory):
os.makedirs(directory)
main()
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment