-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathrcpScraper.py
More file actions
34 lines (26 loc) · 971 Bytes
/
rcpScraper.py
File metadata and controls
34 lines (26 loc) · 971 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
from bs4 import BeautifulSoup
import requests, io
import drudgeScraper
def scrape():
i = 0
n = 25
drudge = requests.get("https://www.realclearpolitics.com/")
soup = BeautifulSoup(drudge.content, 'html.parser')
outputFile = open('RCPheadlines.txt', 'w+')
outputFile.write('--------------\n--------------\nRCP Headlines\n--------------\n--------------\n\n')
try:
for counter in range(1,n+1):
for div in soup.findAll('div', {'class': 'story'}):
a = div.findAll('a')[i]
mainTitle = (a.text.strip())
mainLink = (a.attrs['href'])
if (i != 18):
outputFile.write(mainTitle + '\n' + mainLink + '\n\n')
drudgeScraper.writer(mainTitle, mainLink)
i += 1
except IndexError:
print(i)
outputFile.close()
if __name__ == "__main__":
scrape()
exit()