Skip to content
Snippets Groups Projects
Commit 9ab985cf authored by Jan Kruse's avatar Jan Kruse
Browse files

Basic functionality

Initial commit with basic functionality
parents
Branches master
No related tags found
No related merge requests found
import requests
import argparse
import os
SUBREDDIT = ""
def create_url_list(subreddit, its):
urllist = []
headers = {'User-agent': 'image-scraping'}
after = ""
for i in range(0, its):
r = requests.get('https://www.reddit.com/r/' + subreddit + '/new/.json', headers=headers, params = {'after': after})
amount_posts = r.json()['data']['dist']
for i in range(0, 25):
urllist.append(r.json()['data']['children'][i]['data']['url'])
after = r.json()['data']['after']
return urllist
def download_pics(lst, folder_prefix):
for url in lst:
print("Downloading: " + url)
filename = url.split('/')[-1]
type = filename.split('.')[-1]
if ( (type == "jpg") or (type == 'png') or (type == 'jpeg')):
r = requests.get(url)
with open(folder_prefix + '/' + filename, "wb") as filebuf:
filebuf.write(r.content)
def download_subreddit_pics(subreddit, pages, dest_folder):
whole_path = dest_folder + '/' + subreddit
if not os.path.exists(whole_path):
os.mkdir(whole_path)
urls = create_url_list(subreddit, pages)
download_pics(urls, whole_path)
parser = argparse.ArgumentParser(description='Downloads all pictures from a subreddit')
parser.add_argument('subreddit', metavar='SUBREDDIT', type=str, help='A subreddit that should be downloaded')
parser.add_argument('dest_folder', metavar='PATH', type=str, help='The destination folder where all the files should be downloaded into')
parser.add_argument('pages_count', metavar='AMOUNT_PAGES', type=int, help='The number of pages the script shall download')
args = parser.parse_args()
download_subreddit_pics(args.subreddit, args.pages_count, args.dest_folder)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment