Skip to content

Commit be3efb8

Browse files
authored
Update scrapping.py
Enhanced the listing of products into the csv file.
1 parent 71705ab commit be3efb8

File tree

1 file changed

+24
-10
lines changed

1 file changed

+24
-10
lines changed

amazon_scrapping/scrapping.py

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,12 @@
44
from selenium.webdriver.support import expected_conditions as EC
55
import pandas as pd
66

7-
path = '/Users/hmharsh/Downloads/chromedriver'
8-
driver = webdriver.Chrome(path)
7+
options = webdriver.ChromeOptions()
8+
driver = webdriver.Chrome(options=options)
99

1010
def product_listing(txt):
1111
name_list = []
12+
price_list = []
1213
driver.get("https://www.amazon.in/")
1314
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, 'twotabsearchtextbox')))
1415
search_box = driver.find_element(By.ID, 'twotabsearchtextbox')
@@ -20,27 +21,40 @@ def product_listing(txt):
2021
while True:
2122
WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.XPATH, '//span[@class="a-size-medium a-color-base a-text-normal"]')))
2223
items = driver.find_elements(By.XPATH, '//span[@class="a-size-medium a-color-base a-text-normal"]')
23-
for item in items:
24+
prices = driver.find_elements(By.XPATH, '//span[@class="a-price-whole"]')
25+
26+
for item, price in zip(items, prices):
2427
name_list.append(item.text)
28+
price_list.append(price.text if price else 'N/A')
2529

2630
try:
2731
next_button = driver.find_element(By.CLASS_NAME, 's-pagination-next')
2832
if 's-pagination-disabled' in next_button.get_attribute('class'):
2933
break
3034
next_button.click()
31-
except:
35+
except Exception as e:
36+
print(f"An error occurred: {e}")
3237
break
33-
return name_list
38+
return name_list, price_list
3439

40+
# List of search terms
3541
names = ['Laptop', 'Phones', 'Printers', 'Desktops', 'Monitors', 'Mouse', 'Pendrive', 'Earphones', 'Smart TV', 'Power banks']
3642
all_product_listings = []
43+
all_product_prices = []
44+
category = []
3745

46+
# Scrape data for each search term
3847
for name in names:
39-
all_product_listings.extend(product_listing(name))
48+
products, prices = product_listing(name)
49+
all_product_listings.extend(products)
50+
all_product_prices.extend(prices)
51+
category.extend([name] * len(products)) # Extend category list with repeated entries for each product
4052

41-
# Convert the list to a DataFrame and save it as a CSV file
42-
df = pd.DataFrame(all_product_listings, columns=['Product Name'])
43-
df.to_csv('./prod_listings.csv', index=False)
44-
print(df)
53+
# Convert the data to a DataFrame and save it as a CSV file
54+
df = pd.DataFrame({'Category': category, 'Product Name': all_product_listings, 'Price': all_product_prices})
55+
output_file = './prod_listings.csv'
56+
df.to_csv(output_file, index=False)
57+
print(f"Data saved to {output_file}")
4558

59+
# Quit the WebDriver
4660
driver.quit()

0 commit comments

Comments
 (0)