This repository was archived by the owner on Jun 17, 2025. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathreddit_saved_links.py
More file actions
70 lines (57 loc) · 1.99 KB
/
reddit_saved_links.py
File metadata and controls
70 lines (57 loc) · 1.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# -*- coding: utf-8 -*-
"""
Created on Sun Aug 20 21:42:42 2017
@author: Desmonduz
"""
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import json
username="your_username"
pwd="your_pwd"
driver=webdriver.Chrome()
driver.get("http://www.reddit.com")
elem = driver.find_element_by_name("user")
elem.clear()
elem.send_keys(username)
elem = driver.find_element_by_name("passwd")
elem.clear()
elem.send_keys(pwd)
elem.submit()
results=[]
def collect_results():
links=[]
elems = driver.find_elements_by_css_selector("div.entry")
for elem in elems:
print elem.text
link={}
try:
link["title"]=elem.find_element_by_css_selector("a.title").text
link["url"]=elem.find_element_by_css_selector("a.title").get_attribute('href')
link["sub"]=elem.find_element_by_css_selector("a.subreddit").text
link["date"]=elem.find_element_by_css_selector("time").get_attribute('datetime')
except:
link["title"]=elem.find_element_by_css_selector("form.usertext").text
link["url"]=elem.find_element_by_link_text("permalink").get_attribute('href')
link["sub"]="user comment"
link["date"]=elem.find_element_by_css_selector("time").get_attribute('datetime')
links.append(link)
return links
try:
elem=WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.LINK_TEXT, "logout"))
)
driver.get("https://www.reddit.com/user/%s/saved" % username)
links = collect_results()
while links:
results+=links
elem=driver.find_element_by_link_text('next ›')
elem.click()
links=collect_results()
finally:
driver.close()
if results:
with open('saved_links.json', 'w') as f:
json.dump(results, f)