|
6 | 6 |
|
7 | 7 | query = input("Enter what you want to search : ") |
8 | 8 | query = urllib.parse.quote_plus(query) # Format into URL encoding |
9 | | -number_result = 50 # Give result upto 50 |
| 9 | +number_result = 50 # Give result upto 50 |
10 | 10 | ua = UserAgent() |
11 | | -google_url = "https://www.google.com/search?q=" + \ |
12 | | - query + "&num=" + str(number_result) |
| 11 | +google_url = "https://www.google.com/search?q=" + query + "&num=" + str(number_result) |
13 | 12 | response = requests.get(google_url, {"User-Agent": ua.random}) |
14 | 13 | soup = BeautifulSoup(response.text, "html.parser") |
15 | 14 |
|
16 | | -result_div = soup.find_all('div', attrs={'class': 'ZINbbc'}) |
| 15 | +result_div = soup.find_all("div", attrs={"class": "ZINbbc"}) |
17 | 16 |
|
18 | | -links = [] #Links to results |
19 | | -titles = [] #Title of results |
20 | | -descriptions = [] #Description about result |
| 17 | +links = [] # Links to results |
| 18 | +titles = [] # Title of results |
| 19 | +descriptions = [] # Description about result |
21 | 20 | for r in result_div: |
22 | 21 | # Checks if each element is present, else, raise exception |
23 | 22 | try: |
24 | | - link = r.find('a', href=True) |
25 | | - title = r.find('div', attrs={'class': 'vvjwJb'}).get_text() |
26 | | - description = r.find('div', attrs={'class': 's3v9rd'}).get_text() |
| 23 | + link = r.find("a", href=True) |
| 24 | + title = r.find("div", attrs={"class": "vvjwJb"}).get_text() |
| 25 | + description = r.find("div", attrs={"class": "s3v9rd"}).get_text() |
27 | 26 |
|
28 | 27 | # Check to make sure everything is present before appending |
29 | | - if link != '' and title != '' and description != '': |
30 | | - links.append(link['href']) |
| 28 | + if link != "" and title != "" and description != "": |
| 29 | + links.append(link["href"]) |
31 | 30 | titles.append(title) |
32 | 31 | descriptions.append(description) |
33 | 32 | # Next loop if one element is not present |
|
37 | 36 | to_remove = [] |
38 | 37 | clean_links = [] |
39 | 38 | for i, l in enumerate(links): |
40 | | - clean = re.search('\/url\?q\=(.*)\&sa', l) |
| 39 | + clean = re.search("\/url\?q\=(.*)\&sa", l) |
41 | 40 |
|
42 | 41 | # Anything that doesn't fit the above pattern will be removed |
43 | 42 | if clean is None: |
|
0 commit comments