-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscrape.py
More file actions
34 lines (31 loc) · 953 Bytes
/
scrape.py
File metadata and controls
34 lines (31 loc) · 953 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
try:
from googlesearch import search
except ImportError:
print("No module named 'google' found")
import ssl
import requests
from bs4 import BeautifulSoup
file_to_delete = open("result.txt",'w')
file_to_delete.close()
# to search
query = "Pope Francis shocks world, endorses Donald Trump for president"
list = []
for j in search(query, tld="co.in", num=5, stop=5, pause=2):
list.append(j)
print(j)
context = ssl._create_unverified_context()
for k in list:
reqs = requests.get(k)
soup = BeautifulSoup(reqs.text, 'html.parser')
title = soup.title
print(title.string)
desc = soup.find("meta", property="og:description")
print(desc["content"] if desc else "No meta desc given")
with open('result.txt', 'a') as f:
f.write(title.string.upper())
f.write('\n')
f.write(desc["content"] if desc else "No meta desc given")
f.write('\n')
f.write(k)
f.write('\n')
f.write('\n')