|
1 |
| -# product name |
2 | 1 | from selenium import webdriver
|
3 | 2 | from selenium.webdriver.common.by import By
|
4 | 3 | from selenium.webdriver.support.ui import WebDriverWait
|
5 | 4 | from selenium.webdriver.support import expected_conditions as EC
|
6 |
| -import time |
7 |
| -import json |
8 |
| -import csv |
9 | 5 | import pandas as pd
|
10 | 6 |
|
11 |
| -## One way to load chrome webdirver |
12 |
| -#from webdriver_manager.chrome import ChromeDriverManager |
13 |
| -#driver = webdriver.Chrome(ChromeDriverManager().install()) |
14 |
| - |
15 |
| -## another way to load chrome webdriver |
16 |
| -path = '/Users/mohammedrizwan/Downloads/chromedriver' |
| 7 | +path = '/Users/hmharsh/Downloads/chromedriver' |
17 | 8 | driver = webdriver.Chrome(path)
|
18 | 9 |
|
19 | 10 | def product_listing(txt):
|
20 |
| - name_list = [] # Added this line to define name_list within the function |
21 |
| - # Rest of the function remains unchanged |
| 11 | + name_list = [] |
22 | 12 | driver.get("https://www.amazon.in/")
|
23 |
| - driver.implicitly_wait(2) |
24 |
| - search = driver.find_element_by_id('twotabsearchtextbox').send_keys(txt) |
25 |
| - driver.implicitly_wait(2) |
26 |
| - search_button = driver.find_element_by_id('nav-search-submit-button').click() |
27 |
| - driver.implicitly_wait(5) |
28 |
| - |
29 |
| - items = WebDriverWait(driver,10).until(EC.presence_of_all_elements_located((By.XPATH, '//a[@class="a-link-normal a-text-normal"]'))) |
30 |
| - |
31 |
| - for item in items: |
32 |
| - name_list.append(item.text) |
33 |
| - |
34 |
| - driver.implicitly_wait(5) |
35 |
| - c1 = driver.find_element_by_class_name("a-pagination") |
36 |
| - c2 = c1.text |
37 |
| - c3 = c2.splitlines() |
38 |
| - num_of_pg = c3[-2] |
39 |
| - |
40 |
| - for i in range(int(num_of_pg)-5): |
41 |
| - print(i) |
42 |
| - items = WebDriverWait(driver,10).until(EC.presence_of_all_elements_located((By.XPATH, '//a[@class="a-link-normal a-text-normal"]'))) |
| 13 | + WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, 'twotabsearchtextbox'))) |
| 14 | + search_box = driver.find_element(By.ID, 'twotabsearchtextbox') |
| 15 | + search_box.clear() |
| 16 | + search_box.send_keys(txt) |
| 17 | + search_button = driver.find_element(By.ID, 'nav-search-submit-button') |
| 18 | + search_button.click() |
| 19 | + |
| 20 | + while True: |
| 21 | + WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.XPATH, '//span[@class="a-size-medium a-color-base a-text-normal"]'))) |
| 22 | + items = driver.find_elements(By.XPATH, '//span[@class="a-size-medium a-color-base a-text-normal"]') |
43 | 23 | for item in items:
|
44 | 24 | name_list.append(item.text)
|
45 |
| - link = driver.find_element_by_class_name("a-section.a-spacing-none.a-padding-base") |
46 |
| - next_lin = link.find_element_by_class_name("a-last").find_element_by_tag_name("a").get_attribute("href") |
47 |
| - driver.get(next_lin) |
48 |
| - driver.implicitly_wait(2) |
49 |
| - |
| 25 | + |
| 26 | + try: |
| 27 | + next_button = driver.find_element(By.CLASS_NAME, 's-pagination-next') |
| 28 | + if 's-pagination-disabled' in next_button.get_attribute('class'): |
| 29 | + break |
| 30 | + next_button.click() |
| 31 | + except: |
| 32 | + break |
| 33 | + return name_list |
50 | 34 |
|
51 | 35 | names = ['Laptop', 'Phones', 'Printers', 'Desktops', 'Monitors', 'Mouse', 'Pendrive', 'Earphones', 'Smart TV', 'Power banks']
|
52 |
| -name_list = [] |
53 |
| -for i in names: |
54 |
| - product_listing(i) |
55 |
| -df=pd.DataFrame(name_list) |
56 |
| -df.to_csv('./prod_listings.csv') |
| 36 | +all_product_listings = [] |
| 37 | + |
| 38 | +for name in names: |
| 39 | + all_product_listings.extend(product_listing(name)) |
| 40 | + |
| 41 | +# Convert the list to a DataFrame and save it as a CSV file |
| 42 | +df = pd.DataFrame(all_product_listings, columns=['Product Name']) |
| 43 | +df.to_csv('./prod_listings.csv', index=False) |
57 | 44 | print(df)
|
58 |
| -driver.quit() |
59 | 45 |
|
| 46 | +driver.quit() |
0 commit comments