4
4
from selenium .webdriver .support import expected_conditions as EC
5
5
import pandas as pd
6
6
7
- path = '/Users/hmharsh/Downloads/chromedriver'
8
- driver = webdriver .Chrome (path )
7
+ options = webdriver . ChromeOptions ()
8
+ driver = webdriver .Chrome (options = options )
9
9
10
10
def product_listing (txt ):
11
11
name_list = []
12
+ price_list = []
12
13
driver .get ("https://www.amazon.in/" )
13
14
WebDriverWait (driver , 10 ).until (EC .presence_of_element_located ((By .ID , 'twotabsearchtextbox' )))
14
15
search_box = driver .find_element (By .ID , 'twotabsearchtextbox' )
@@ -20,27 +21,40 @@ def product_listing(txt):
20
21
while True :
21
22
WebDriverWait (driver , 10 ).until (EC .presence_of_all_elements_located ((By .XPATH , '//span[@class="a-size-medium a-color-base a-text-normal"]' )))
22
23
items = driver .find_elements (By .XPATH , '//span[@class="a-size-medium a-color-base a-text-normal"]' )
23
- for item in items :
24
+ prices = driver .find_elements (By .XPATH , '//span[@class="a-price-whole"]' )
25
+
26
+ for item , price in zip (items , prices ):
24
27
name_list .append (item .text )
28
+ price_list .append (price .text if price else 'N/A' )
25
29
26
30
try :
27
31
next_button = driver .find_element (By .CLASS_NAME , 's-pagination-next' )
28
32
if 's-pagination-disabled' in next_button .get_attribute ('class' ):
29
33
break
30
34
next_button .click ()
31
- except :
35
+ except Exception as e :
36
+ print (f"An error occurred: { e } " )
32
37
break
33
- return name_list
38
+ return name_list , price_list
34
39
40
+ # List of search terms
35
41
names = ['Laptop' , 'Phones' , 'Printers' , 'Desktops' , 'Monitors' , 'Mouse' , 'Pendrive' , 'Earphones' , 'Smart TV' , 'Power banks' ]
36
42
all_product_listings = []
43
+ all_product_prices = []
44
+ category = []
37
45
46
+ # Scrape data for each search term
38
47
for name in names :
39
- all_product_listings .extend (product_listing (name ))
48
+ products , prices = product_listing (name )
49
+ all_product_listings .extend (products )
50
+ all_product_prices .extend (prices )
51
+ category .extend ([name ] * len (products )) # Extend category list with repeated entries for each product
40
52
41
- # Convert the list to a DataFrame and save it as a CSV file
42
- df = pd .DataFrame (all_product_listings , columns = ['Product Name' ])
43
- df .to_csv ('./prod_listings.csv' , index = False )
44
- print (df )
53
+ # Convert the data to a DataFrame and save it as a CSV file
54
+ df = pd .DataFrame ({'Category' : category , 'Product Name' : all_product_listings , 'Price' : all_product_prices })
55
+ output_file = './prod_listings.csv'
56
+ df .to_csv (output_file , index = False )
57
+ print (f"Data saved to { output_file } " )
45
58
59
+ # Quit the WebDriver
46
60
driver .quit ()
0 commit comments