Skip to content

Commit 02dc6e8

Browse files
committed
Trying to solve build error -1
1 parent 536cbdc commit 02dc6e8

File tree

1 file changed

+12
-13
lines changed

1 file changed

+12
-13
lines changed

scripts/Web_Scrapper/web_scraper.py

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -6,28 +6,27 @@
66

77
query = input("Enter what you want to search : ")
88
query = urllib.parse.quote_plus(query) # Format into URL encoding
9-
number_result = 50 # Give result upto 50
9+
number_result = 50 # Give result upto 50
1010
ua = UserAgent()
11-
google_url = "https://www.google.com/search?q=" + \
12-
query + "&num=" + str(number_result)
11+
google_url = "https://www.google.com/search?q=" + query + "&num=" + str(number_result)
1312
response = requests.get(google_url, {"User-Agent": ua.random})
1413
soup = BeautifulSoup(response.text, "html.parser")
1514

16-
result_div = soup.find_all('div', attrs={'class': 'ZINbbc'})
15+
result_div = soup.find_all("div", attrs={"class": "ZINbbc"})
1716

18-
links = [] #Links to results
19-
titles = [] #Title of results
20-
descriptions = [] #Description about result
17+
links = [] # Links to results
18+
titles = [] # Title of results
19+
descriptions = [] # Description about result
2120
for r in result_div:
2221
# Checks if each element is present, else, raise exception
2322
try:
24-
link = r.find('a', href=True)
25-
title = r.find('div', attrs={'class': 'vvjwJb'}).get_text()
26-
description = r.find('div', attrs={'class': 's3v9rd'}).get_text()
23+
link = r.find("a", href=True)
24+
title = r.find("div", attrs={"class": "vvjwJb"}).get_text()
25+
description = r.find("div", attrs={"class": "s3v9rd"}).get_text()
2726

2827
# Check to make sure everything is present before appending
29-
if link != '' and title != '' and description != '':
30-
links.append(link['href'])
28+
if link != "" and title != "" and description != "":
29+
links.append(link["href"])
3130
titles.append(title)
3231
descriptions.append(description)
3332
# Next loop if one element is not present
@@ -37,7 +36,7 @@
3736
to_remove = []
3837
clean_links = []
3938
for i, l in enumerate(links):
40-
clean = re.search('\/url\?q\=(.*)\&sa', l)
39+
clean = re.search("\/url\?q\=(.*)\&sa", l)
4140

4241
# Anything that doesn't fit the above pattern will be removed
4342
if clean is None:

0 commit comments

Comments
 (0)