Skip to content

Commit 18f26c5

Browse files
authored
Merge pull request #181 from Harshitmishra001/main
Update scrapping.py
2 parents 947c303 + 8f273fd commit 18f26c5

File tree

1 file changed

+30
-43
lines changed

1 file changed

+30
-43
lines changed

amazon_scrapping/scrapping.py

Lines changed: 30 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,59 +1,46 @@
1-
# product name
21
from selenium import webdriver
32
from selenium.webdriver.common.by import By
43
from selenium.webdriver.support.ui import WebDriverWait
54
from selenium.webdriver.support import expected_conditions as EC
6-
import time
7-
import json
8-
import csv
95
import pandas as pd
106

11-
## One way to load chrome webdirver
12-
#from webdriver_manager.chrome import ChromeDriverManager
13-
#driver = webdriver.Chrome(ChromeDriverManager().install())
14-
15-
## another way to load chrome webdriver
16-
path = '/Users/mohammedrizwan/Downloads/chromedriver'
7+
path = '/Users/hmharsh/Downloads/chromedriver'
178
driver = webdriver.Chrome(path)
189

1910
def product_listing(txt):
20-
name_list = [] # Added this line to define name_list within the function
21-
# Rest of the function remains unchanged
11+
name_list = []
2212
driver.get("https://www.amazon.in/")
23-
driver.implicitly_wait(2)
24-
search = driver.find_element_by_id('twotabsearchtextbox').send_keys(txt)
25-
driver.implicitly_wait(2)
26-
search_button = driver.find_element_by_id('nav-search-submit-button').click()
27-
driver.implicitly_wait(5)
28-
29-
items = WebDriverWait(driver,10).until(EC.presence_of_all_elements_located((By.XPATH, '//a[@class="a-link-normal a-text-normal"]')))
30-
31-
for item in items:
32-
name_list.append(item.text)
33-
34-
driver.implicitly_wait(5)
35-
c1 = driver.find_element_by_class_name("a-pagination")
36-
c2 = c1.text
37-
c3 = c2.splitlines()
38-
num_of_pg = c3[-2]
39-
40-
for i in range(int(num_of_pg)-5):
41-
print(i)
42-
items = WebDriverWait(driver,10).until(EC.presence_of_all_elements_located((By.XPATH, '//a[@class="a-link-normal a-text-normal"]')))
13+
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, 'twotabsearchtextbox')))
14+
search_box = driver.find_element(By.ID, 'twotabsearchtextbox')
15+
search_box.clear()
16+
search_box.send_keys(txt)
17+
search_button = driver.find_element(By.ID, 'nav-search-submit-button')
18+
search_button.click()
19+
20+
while True:
21+
WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.XPATH, '//span[@class="a-size-medium a-color-base a-text-normal"]')))
22+
items = driver.find_elements(By.XPATH, '//span[@class="a-size-medium a-color-base a-text-normal"]')
4323
for item in items:
4424
name_list.append(item.text)
45-
link = driver.find_element_by_class_name("a-section.a-spacing-none.a-padding-base")
46-
next_lin = link.find_element_by_class_name("a-last").find_element_by_tag_name("a").get_attribute("href")
47-
driver.get(next_lin)
48-
driver.implicitly_wait(2)
49-
25+
26+
try:
27+
next_button = driver.find_element(By.CLASS_NAME, 's-pagination-next')
28+
if 's-pagination-disabled' in next_button.get_attribute('class'):
29+
break
30+
next_button.click()
31+
except:
32+
break
33+
return name_list
5034

5135
names = ['Laptop', 'Phones', 'Printers', 'Desktops', 'Monitors', 'Mouse', 'Pendrive', 'Earphones', 'Smart TV', 'Power banks']
52-
name_list = []
53-
for i in names:
54-
product_listing(i)
55-
df=pd.DataFrame(name_list)
56-
df.to_csv('./prod_listings.csv')
36+
all_product_listings = []
37+
38+
for name in names:
39+
all_product_listings.extend(product_listing(name))
40+
41+
# Convert the list to a DataFrame and save it as a CSV file
42+
df = pd.DataFrame(all_product_listings, columns=['Product Name'])
43+
df.to_csv('./prod_listings.csv', index=False)
5744
print(df)
58-
driver.quit()
5945

46+
driver.quit()

0 commit comments

Comments
 (0)