|
| 1 | +import urllib |
| 2 | +import requests |
| 3 | +from fake_useragent import UserAgent |
| 4 | +from bs4 import BeautifulSoup |
| 5 | +import re |
| 6 | + |
| 7 | +query = input("Enter what you want to search : ") |
| 8 | +query = urllib.parse.quote_plus(query) # Format into URL encoding |
| 9 | +number_result = 50 # Give result upto 50 |
| 10 | +ua = UserAgent() |
| 11 | +google_url = "https://www.google.com/search?q=" + query + "&num=" + str(number_result) |
| 12 | +response = requests.get(google_url, {"User-Agent": ua.random}) |
| 13 | +soup = BeautifulSoup(response.text, "html.parser") |
| 14 | + |
| 15 | +result_div = soup.find_all("div", attrs={"class": "ZINbbc"}) |
| 16 | + |
| 17 | +links = [] # Links to results |
| 18 | +titles = [] # Title of results |
| 19 | +descriptions = [] # Description about result |
| 20 | +for r in result_div: |
| 21 | + # Checks if each element is present, else, raise exception |
| 22 | + try: |
| 23 | + link = r.find("a", href=True) |
| 24 | + title = r.find("div", attrs={"class": "vvjwJb"}).get_text() |
| 25 | + description = r.find("div", attrs={"class": "s3v9rd"}).get_text() |
| 26 | + |
| 27 | + # Check to make sure everything is present before appending |
| 28 | + if link != "" and title != "" and description != "": |
| 29 | + links.append(link["href"]) |
| 30 | + titles.append(title) |
| 31 | + descriptions.append(description) |
| 32 | + # Next loop if one element is not present |
| 33 | + except: |
| 34 | + continue |
| 35 | + |
| 36 | +to_remove = [] |
| 37 | +clean_links = [] |
| 38 | +for i, l in enumerate(links): |
| 39 | + clean = re.search("\/url\?q\=(.*)\&sa", l) |
| 40 | + |
| 41 | + # Anything that doesn't fit the above pattern will be removed |
| 42 | + if clean is None: |
| 43 | + to_remove.append(i) |
| 44 | + continue |
| 45 | + clean_links.append(clean.group(1)) |
| 46 | + |
| 47 | +# Remove the corresponding titles & descriptions |
| 48 | +for x in to_remove: |
| 49 | + del titles[x] |
| 50 | + del descriptions[x] |
| 51 | +for i in range(0, len(clean_links)): |
| 52 | + print(titles[i]) |
| 53 | + print(descriptions[i]) |
| 54 | + print(clean_links[i]) |
| 55 | + print() |
0 commit comments