Skip to content

Commit f8a5cec

Browse files
committed
feat: modify input for NorthumberlandCouncil to accept uprn instead of house number, and use new page structure
1 parent c2dbfd8 commit f8a5cec

File tree

2 files changed

+71
-86
lines changed

2 files changed

+71
-86
lines changed

uk_bin_collection/tests/input.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1779,13 +1779,13 @@
17791779
"LAD24CD": "E06000065"
17801780
},
17811781
"NorthumberlandCouncil": {
1782-
"house_number": "22",
1782+
"uprn": "10093091235",
17831783
"postcode": "NE46 1UQ",
17841784
"skip_get_url": true,
1785-
"url": "https://www.northumberland.gov.uk/Waste/Household-waste/Household-bin-collections/Bin-Calendars.aspx",
1785+
"url": "https://bincollection.northumberland.gov.uk/postcode",
17861786
"web_driver": "http://selenium:4444",
17871787
"wiki_name": "Northumberland",
1788-
"wiki_note": "Pass the house number and postcode in their respective parameters. This parser requires a Selenium webdriver.",
1788+
"wiki_note": "Pass the UPRN. You can find it using [FindMyAddress](https://www.findmyaddress.co.uk/search).",
17891789
"LAD24CD": "E06000057"
17901790
},
17911791
"NorwichCityCouncil": {
@@ -2805,4 +2805,4 @@
28052805
"wiki_note": "Provide your UPRN.",
28062806
"LAD24CD": "E06000014"
28072807
}
2808-
}
2808+
}

uk_bin_collection/uk_bin_collection/councils/NorthumberlandCouncil.py

Lines changed: 67 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,17 @@
11
import time
2+
import datetime
23

4+
from datetime import datetime
35
from bs4 import BeautifulSoup
46
from selenium.common.exceptions import TimeoutException
57
from selenium.webdriver.common.by import By
8+
from selenium.webdriver.common.keys import Keys
69
from selenium.webdriver.support import expected_conditions as EC
7-
from selenium.webdriver.support.ui import WebDriverWait
10+
from selenium.webdriver.support.ui import Select, WebDriverWait
811

912
from uk_bin_collection.uk_bin_collection.common import *
1013
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
1114

12-
# import the wonderful Beautiful Soup and the URL grabber
13-
14-
1515
class CouncilClass(AbstractGetBinDataClass):
1616
"""
1717
Concrete classes have to implement all abstract operations of the
@@ -30,16 +30,18 @@ def extract_styles(self, style_str: str) -> dict:
3030
def parse_data(self, page: str, **kwargs) -> dict:
3131
driver = None
3232
try:
33-
page = "https://www.northumberland.gov.uk/Waste/Household-waste/Household-bin-collections/Bin-Calendars.aspx"
33+
page = "https://bincollection.northumberland.gov.uk/postcode"
3434

3535
data = {"bins": []}
3636

37-
user_paon = kwargs.get("paon")
3837
user_postcode = kwargs.get("postcode")
38+
user_uprn = kwargs.get("uprn")
39+
40+
check_postcode(user_postcode)
41+
check_uprn(user_uprn)
42+
3943
web_driver = kwargs.get("web_driver")
4044
headless = kwargs.get("headless")
41-
check_paon(user_paon)
42-
check_postcode(user_postcode)
4345

4446
# Create Selenium webdriver
4547
driver = create_webdriver(web_driver, headless, None, __name__)
@@ -49,106 +51,89 @@ def parse_data(self, page: str, **kwargs) -> dict:
4951
wait = WebDriverWait(driver, 20)
5052

5153
# Wait for and click cookie button
52-
cookie_button = wait.until(
53-
EC.element_to_be_clickable((By.ID, "ccc-notify-accept"))
54-
)
55-
cookie_button.click()
56-
57-
# Wait for and find house number input
58-
inputElement_hn = wait.until(
59-
EC.presence_of_element_located(
60-
(
61-
By.ID,
62-
"p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_NCCAddressLookup_txtHouse",
63-
)
54+
try:
55+
cookie_button = wait.until(
56+
EC.element_to_be_clickable((By.CLASS_NAME, "accept-all"))
6457
)
65-
)
58+
cookie_button.click()
59+
except TimeoutException:
60+
print("Cookie banner not found, continuing...")
6661

6762
# Wait for and find postcode input
6863
inputElement_pc = wait.until(
6964
EC.presence_of_element_located(
70-
(
71-
By.ID,
72-
"p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_NCCAddressLookup_txtPostcode",
73-
)
65+
(By.ID, "postcode")
7466
)
7567
)
7668

77-
# Enter details
69+
# Enter postcode and submit
7870
inputElement_pc.send_keys(user_postcode)
79-
inputElement_hn.send_keys(user_paon)
71+
inputElement_pc.send_keys(Keys.ENTER)
72+
73+
# Wait for and find house number input
74+
selectElement_address = wait.until(
75+
EC.presence_of_element_located(
76+
(By.ID, "address")
77+
)
78+
)
79+
80+
dropdown = Select(selectElement_address)
81+
dropdown.select_by_value(user_uprn)
8082

81-
# Click lookup button and wait for results
82-
lookup_button = wait.until(
83+
# Click submit button and wait for results
84+
submit_button = wait.until(
8385
EC.element_to_be_clickable(
84-
(
85-
By.ID,
86-
"p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_NCCAddressLookup_butLookup",
87-
)
86+
(By.CLASS_NAME, "govuk-button")
8887
)
8988
)
90-
lookup_button.click()
89+
submit_button.click()
9190

9291
# Wait for results to load
9392
route_summary = wait.until(
9493
EC.presence_of_element_located(
95-
(
96-
By.ID,
97-
"p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_spanRouteSummary",
98-
)
94+
(By.CLASS_NAME, "govuk-table")
9995
)
10096
)
10197

98+
now = datetime.now()
99+
current_month = now.month
100+
current_year = now.year
101+
102102
# Get page source after everything has loaded
103103
soup = BeautifulSoup(driver.page_source, features="html.parser")
104104

105-
# Work out which bins can be collected for this address. Glass bins are only on some houses due to pilot programme.
106-
bins_collected = list(
107-
map(
108-
str.strip,
109-
soup.find(
110-
"span",
111-
id="p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_spanRouteSummary",
112-
)
113-
.text.replace("Routes found: ", "")
114-
.split(","),
105+
# From the table, find all rows:
106+
# - cell 1 is the date in format eg. 9 September (so no year value 🥲)
107+
# - cell 2 is the day name, not useful
108+
# - cell 3 is the bin type eg. "General waste", "Recycling", "Garden waste"
109+
rows = soup.find_all("tr", class_="govuk-table__row")
110+
111+
for row in rows:
112+
bin_type=row.find_all("td")[-1].text.strip()
113+
114+
collection_date_string = row.find('th').text.strip()
115+
116+
# sometimes but not always the day is written "22nd" instead of 22 so make sure we get a proper int
117+
collection_date_day = [int(i) for i in collection_date_string.split(' ').split() if i.isdigit()]
118+
collection_date_month_name = collection_date_string.split(' ')[1]
119+
120+
# if we are currently in Oct, Nov, or Dec and the collection month is Jan, Feb, or Mar, let's assume its next year
121+
if (current_month >= 10) and (collection_date_month_name in ["January", "February", "March"]):
122+
collection_date_year = current_year + 1
123+
else:
124+
collection_date_year = current_year
125+
126+
collection_date = time.strptime(
127+
f"{collection_date_day[0]} {collection_date_month_name} {collection_date_year}", "%d %B %Y"
115128
)
116-
)
117129

118-
# Get the background colour for each of them...
119-
bins_by_colours = dict()
120-
for bin in bins_collected:
121-
if "(but no dates found)" in bin:
122-
continue
123-
style_str = soup.find("span", string=bin)["style"]
124-
bin_colour = self.extract_styles(style_str)["background-color"].upper()
125-
bins_by_colours[bin_colour] = bin
126-
127-
# Work through the tables gathering the dates, if the cell has a background colour - match it to the bin type.
128-
calander_tables = soup.find_all("table", title="Calendar")
129-
for table in calander_tables:
130-
# Get month and year
131-
# First row in table is the header
132-
rows = table.find_all("tr")
133-
month_and_year = (
134-
rows[0].find("table", class_="calCtrlTitle").find("td").string
130+
# Add it to the data
131+
data["bins"].append(
132+
{
133+
"type": bin_type,
134+
"collectionDate": time.strftime(date_format, collection_date),
135+
}
135136
)
136-
bin_days = table.find_all("td", class_="calCtrlDay")
137-
for day in bin_days:
138-
day_styles = self.extract_styles(day["style"])
139-
if "background-color" in day_styles:
140-
colour = day_styles["background-color"].upper()
141-
date = time.strptime(
142-
f"{day.string} {month_and_year}", "%d %B %Y"
143-
)
144-
145-
# Add it to the data
146-
data["bins"].append(
147-
{
148-
"type": bins_by_colours[colour],
149-
"collectionDate": time.strftime(date_format, date),
150-
}
151-
)
152137
except Exception as e:
153138
# Here you can log the exception if needed
154139
print(f"An error occurred: {e}")

0 commit comments

Comments
 (0)