Merge pull request #2567 from Juhibhojani/main

avinashkranjan · web-flow · commit 6ae131fe3ccc · 2023-08-09T22:04:22.000+05:30
Zomato Scraper
diff --git a/Zomato Scraper/readme.md b/Zomato Scraper/readme.md
@@ -0,0 +1,12 @@
+# Infinite Scroll Web Scraping
+
+This Python script uses Selenium and BeautifulSoup to perform web scraping with infinite scroll on the Zomato website. The script navigates to a specific page on Zomato that lists cafes in Ahmedabad, India, and extracts details such as name, link, rating, cuisine, and rate for each cafe.
+
+## Requirements
+
+- Google Chrome (or another browser) with a compatible version of ChromeDriver
+- ChromeDriver (compatible with your Chrome version)
+
+## Working
+
+The script will start scraping the cafes on the Zomato page. It will scroll down the page and print the details of each cafe until you interrupt the process (e.g., using Ctrl + C).
diff --git a/Zomato Scraper/requirements.txt b/Zomato Scraper/requirements.txt
@@ -0,0 +1,2 @@
+beautifulsoup4==4.10.0
+selenium==3.141.0
diff --git a/Zomato Scraper/zomato.py b/Zomato Scraper/zomato.py
@@ -0,0 +1,34 @@
+import re
+from selenium.webdriver.common.keys import Keys
+from bs4 import BeautifulSoup
+import time
+from selenium import webdriver
+
+driver = webdriver.Chrome()
+url = "https://zomato.com/ahmedabad/restaurants/cafes?category=2"
+driver.get(url)
+html = driver.page_source
+soup = BeautifulSoup(html, "html.parser")
+
+container = soup.find("div",{"id":"root"})
+i = 0
+
+while True:
+    i = 0
+    for items in container.find_all("div",class_=re.compile("sc-1mo3ldo-0 sc-")):
+        if i==0:
+            i = 1
+            continue
+        print(items.text)
+        first_child = items.find("div")
+        for item in first_child:
+            link = item.find("a",href=True)['href']
+            print(link)
+            name = item.find("h4")
+            print(name.text)
+            rating = item.find("div",{"class":"sc-1q7bklc-1 cILgox"})
+            print(rating.text)
+            cusine = item.find("p")
+            print(cusine.text)
+            rate = item.find("p").next_sibling
+            print(rate.text)

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+beautifulsoup4==4.10.0`
	`2`	`+selenium==3.141.0`