-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathZIP_demographics_scraping.py
More file actions
54 lines (42 loc) · 1.85 KB
/
ZIP_demographics_scraping.py
File metadata and controls
54 lines (42 loc) · 1.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#! python3
# A python script that scrapes ZIP code demographics attributes from a website built on javascript, using the selenium module and writes the scraped results into
# an Excel spreadsheet. Because the script is written to be used with the Firefox browser,
# it is necessary to have Firefox installed to run the script.
import os, openpyxl
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
#open excel file with the list of all zip codes, with write method
ZIPcodes = openpyxl.load_workbook('replace with directory')
ZIPsheet = ZIPcodes.get_sheet_by_name('replace with sheet')
browser = webdriver.Firefox()
#get URL
URL = ZIPsheet['E' + str(2)].value
print(URL)
#go to URL
browser.get(URL)
AgeValue = browser.find_element_css_selector('meta.itemprop')
print(AgeValue)
# enter in the text field
textInput = browser.find_element_by_id('zipCodeInput')
textInput.send_keys(zip)
textInput.send_keys(Keys.ENTER)
# click on the income tab
income = browser.find_element_by_link_text('Income')
income.click()
incomeValue = browser.find_element_by_class_name('chart-value')
ZIPsheet['B' + str(i+1)].value = incomeValue.text
#get age and write to a column
age = browser.find_element_by_link_text('Age')
age.click()
AgeValue = browser.find_element_by_css_selector('#age div.chart-value')
ZIPsheet['C' + str(i+1)].value = AgeValue.text
#get pop density and write to a column
density = browser.find_element_by_link_text('Population Density')
density.click()
densityValue = browser.find_element_by_css_selector('#population div.chart-value')
ZIPsheet['D' + str(i+1)].value = densityValue.text
#clear text field
textInput.clear()
# save into new file
os.chdir('replace with directory')
ZIPcodes.save('scraped_data.xlsx')