Skip to content

Commit 80c8b15

Browse files
committed
fix: East Herts
1 parent d14fdc0 commit 80c8b15

File tree

3 files changed

+91
-177
lines changed

3 files changed

+91
-177
lines changed

uk_bin_collection/tests/input.json

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -761,13 +761,11 @@
761761
},
762762
"EastHertsCouncil": {
763763
"LAD24CD": "E07000097",
764-
"house_number": "1",
765-
"postcode": "CM20 2FZ",
766764
"skip_get_url": true,
767-
"url": "https://www.eastherts.gov.uk",
768-
"web_driver": "http://selenium:4444",
765+
"uprn": "10023088183",
766+
"url": "https://east-herts.co.uk/api/services/",
769767
"wiki_name": "East Herts Council",
770-
"wiki_note": "Pass the house number and postcode in their respective parameters."
768+
"wiki_note": "Pass the UPRN. You can find it using [FindMyAddress](https://www.findmyaddress.co.uk/search)."
771769
},
772770
"EastLindseyDistrictCouncil": {
773771
"house_number": "1",
Lines changed: 27 additions & 116 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
1-
from bs4 import BeautifulSoup
2-
from selenium.webdriver.common.by import By
3-
from selenium.webdriver.support import expected_conditions as EC
4-
from selenium.webdriver.support.wait import WebDriverWait
1+
import json
2+
import requests
3+
from datetime import datetime
54

65
from uk_bin_collection.uk_bin_collection.common import *
76
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
@@ -15,116 +14,28 @@ class CouncilClass(AbstractGetBinDataClass):
1514
"""
1615

1716
def parse_data(self, page: str, **kwargs) -> dict:
18-
# Get and check UPRN
19-
driver = None
20-
try:
21-
user_postcode = kwargs.get("postcode")
22-
user_paon = kwargs.get("paon")
23-
check_paon(user_paon)
24-
check_postcode(user_postcode)
25-
web_driver = kwargs.get("web_driver")
26-
headless = kwargs.get("headless")
27-
bindata = {"bins": []}
28-
29-
API_URL = "https://uhte-wrp.whitespacews.com"
30-
31-
# Create Selenium webdriver
32-
driver = create_webdriver(web_driver, headless, None, __name__)
33-
driver.get(API_URL)
34-
35-
# Click Find my bin collection day button
36-
collectionButton = WebDriverWait(driver, 10).until(
37-
EC.element_to_be_clickable((By.LINK_TEXT, "Find my bin collection day"))
38-
)
39-
collectionButton.click()
40-
41-
main_content = WebDriverWait(driver, 10).until(
42-
EC.presence_of_element_located((By.ID, "main-content"))
43-
)
44-
45-
# Wait for the property number field to appear then populate it
46-
inputElement_number = WebDriverWait(driver, 10).until(
47-
EC.element_to_be_clickable(
48-
(
49-
By.ID,
50-
"address_name_number",
51-
)
52-
)
53-
)
54-
inputElement_number.send_keys(user_paon)
55-
56-
# Wait for the postcode field to appear then populate it
57-
inputElement_postcode = WebDriverWait(driver, 10).until(
58-
EC.element_to_be_clickable(
59-
(
60-
By.ID,
61-
"address_postcode",
62-
)
63-
)
64-
)
65-
inputElement_postcode.send_keys(user_postcode)
66-
67-
# Click search button
68-
continueButton = WebDriverWait(driver, 10).until(
69-
EC.element_to_be_clickable(
70-
(
71-
By.ID,
72-
"Submit",
73-
)
74-
)
75-
)
76-
continueButton.click()
77-
78-
# Wait for the 'Search Results' to appear and select the first result
79-
property = WebDriverWait(driver, 10).until(
80-
EC.element_to_be_clickable(
81-
(
82-
By.CSS_SELECTOR,
83-
"li.app-subnav__section-item a",
84-
# "app-subnav__link govuk-link clicker colordarkblue fontfamilyArial fontsize12rem",
85-
# "//a[starts-with(@aria-label, '{user_paon}')]",
86-
)
87-
)
88-
)
89-
property.click()
90-
91-
upcoming_scheduled_collections = WebDriverWait(driver, 10).until(
92-
EC.presence_of_element_located(
93-
(By.ID, "upcoming-scheduled-collections")
94-
)
95-
)
96-
97-
soup = BeautifulSoup(driver.page_source, features="html.parser")
98-
99-
collections = []
100-
for collection in soup.find_all(
101-
"u1",
102-
class_="displayinlineblock justifycontentleft alignitemscenter margin0 padding0",
103-
):
104-
date = collection.find(
105-
"p", string=lambda text: text and "/" in text
106-
).text.strip() # Extract date
107-
service = collection.find(
108-
"p", string=lambda text: text and "Collection Service" in text
109-
).text.strip() # Extract service type
110-
collections.append({"date": date, "service": service})
111-
112-
# Print the parsed data
113-
for item in collections:
114-
115-
dict_data = {
116-
"type": item["service"],
117-
"collectionDate": item["date"],
118-
}
119-
bindata["bins"].append(dict_data)
120-
121-
except Exception as e:
122-
# Here you can log the exception if needed
123-
print(f"An error occurred: {e}")
124-
# Optionally, re-raise the exception if you want it to propagate
125-
raise
126-
finally:
127-
# This block ensures that the driver is closed regardless of an exception
128-
if driver:
129-
driver.quit()
17+
user_uprn = kwargs.get("uprn")
18+
check_uprn(user_uprn)
19+
bindata = {"bins": []}
20+
21+
# Make API request
22+
api_url = f"https://east-herts.co.uk/api/services/{user_uprn}"
23+
response = requests.get(api_url)
24+
response.raise_for_status()
25+
26+
data = response.json()
27+
today = datetime.now().date()
28+
29+
for service in data.get("services", []):
30+
collection_date_str = service.get("collectionDate")
31+
if collection_date_str:
32+
collection_date = datetime.strptime(collection_date_str, "%Y-%m-%d").date()
33+
# Only include future dates
34+
if collection_date >= today:
35+
dict_data = {
36+
"type": service.get("binType", ""),
37+
"collectionDate": collection_date.strftime("%d/%m/%Y"),
38+
}
39+
bindata["bins"].append(dict_data)
40+
13041
return bindata

uk_bin_collection/uk_bin_collection/councils/EastRenfrewshireCouncil.py

Lines changed: 61 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,12 @@
22
from selenium.webdriver.common.by import By
33
from selenium.webdriver.support import expected_conditions as EC
44
from selenium.webdriver.support.wait import WebDriverWait
5+
from selenium.webdriver.support.ui import Select
56

67
from uk_bin_collection.uk_bin_collection.common import *
78
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
89

910

10-
# import the wonderful Beautiful Soup and the URL grabber
1111
class CouncilClass(AbstractGetBinDataClass):
1212
"""
1313
Concrete classes have to implement all abstract operations of the
@@ -21,97 +21,102 @@ def parse_data(self, page: str, **kwargs) -> dict:
2121
data = {"bins": []}
2222
user_paon = kwargs.get("paon")
2323
user_postcode = kwargs.get("postcode")
24+
user_uprn = kwargs.get("uprn")
2425
web_driver = kwargs.get("web_driver")
2526
headless = kwargs.get("headless")
26-
check_paon(user_paon)
2727
check_postcode(user_postcode)
2828

2929
# Create Selenium webdriver
3030
driver = create_webdriver(web_driver, headless, None, __name__)
31-
driver.get(
32-
"https://eastrenfrewshire.gov.uk/article/1145/Bin-collection-days"
33-
)
31+
driver.get("https://eastrenfrewshire.gov.uk/bin-days")
3432

3533
# Wait for the postcode field to appear then populate it
3634
inputElement_postcode = WebDriverWait(driver, 30).until(
3735
EC.presence_of_element_located(
38-
(By.ID, "RESIDUALWASTEV2_PAGE1_POSTCODE")
36+
(By.CSS_SELECTOR, "input[autocomplete='postal-code']")
3937
)
4038
)
4139
inputElement_postcode.send_keys(user_postcode)
4240

4341
# Click search button
44-
findAddress = WebDriverWait(driver, 10).until(
45-
EC.presence_of_element_located(
46-
(By.ID, "RESIDUALWASTEV2_PAGE1_FIELD199_NEXT")
47-
)
48-
)
49-
findAddress.click()
50-
51-
# Wait for the 'Select address' dropdown to appear and select option matching the house name/number
52-
WebDriverWait(driver, 10).until(
42+
search_button = WebDriverWait(driver, 10).until(
5343
EC.element_to_be_clickable(
54-
(
55-
By.XPATH,
56-
"//select[@id='RESIDUALWASTEV2_PAGE2_UPRN']//option[contains(., '"
57-
+ user_paon
58-
+ "')]",
59-
)
44+
(By.XPATH, "//button[text()='Search']")
6045
)
61-
).click()
46+
)
47+
search_button.click()
6248

63-
# Click search button
64-
findDates = WebDriverWait(driver, 10).until(
49+
# Wait for the addresses dropdown to appear
50+
addresses_select = WebDriverWait(driver, 10).until(
6551
EC.presence_of_element_located(
66-
(By.ID, "RESIDUALWASTEV2_PAGE2_FIELD206_NEXT")
52+
(By.XPATH, "//label[text()='Addresses']/following-sibling::select")
6753
)
6854
)
69-
findDates.click()
55+
56+
# Select the appropriate address based on UPRN or house number
57+
select = Select(addresses_select)
58+
if user_uprn:
59+
# Select by UPRN value
60+
select.select_by_value(user_uprn)
61+
elif user_paon:
62+
# Select by house number/name in the text
63+
for option in select.options:
64+
if user_paon in option.text:
65+
select.select_by_visible_text(option.text)
66+
break
67+
else:
68+
# Select the first non-default option
69+
select.select_by_index(1)
70+
71+
# Click the "Find my collection dates" button
72+
find_dates_button = WebDriverWait(driver, 10).until(
73+
EC.element_to_be_clickable(
74+
(By.XPATH, "//button[text()='Find my collection dates']")
75+
)
76+
)
77+
find_dates_button.click()
7078

71-
# Wait for the collections table to appear
79+
# Wait for the results table to appear
7280
WebDriverWait(driver, 10).until(
7381
EC.presence_of_element_located(
74-
(By.ID, "RESIDUALWASTEV2_COLLECTIONDATES_DISPLAYBINCOLLECTIONINFO")
82+
(By.XPATH, "//th[text()='Bin Type']")
7583
)
7684
)
7785

7886
soup = BeautifulSoup(driver.page_source, features="html.parser")
79-
soup.prettify()
80-
81-
# Get collections div
82-
next_collection_div = soup.find("div", {"id": "yourNextCollection"})
83-
84-
# Get next collection date
85-
next_collection_date = datetime.strptime(
86-
next_collection_div.find("span", {"class": "dueDate"})
87-
.get_text()
88-
.strip(),
89-
"%d/%m/%Y",
90-
)
91-
92-
# Get next collection bins
93-
next_collection_bin = next_collection_div.findAll(
94-
"span", {"class": "binColour"}
95-
)
96-
97-
# Format results
98-
for row in next_collection_bin:
99-
dict_data = {
100-
"type": row.get_text().strip(),
101-
"collectionDate": next_collection_date.strftime("%d/%m/%Y"),
102-
}
103-
data["bins"].append(dict_data)
87+
88+
# Find the table with bin collection data
89+
table = soup.find("th", string="Bin Type").find_parent("table")
90+
rows = table.find_all("tr")[1:] # Skip header row
91+
92+
for row in rows:
93+
cells = row.find_all("td")
94+
if len(cells) >= 3:
95+
date_cell = cells[0].get_text().strip()
96+
bin_type_cell = cells[2]
97+
98+
# Only process rows that have a date
99+
if date_cell:
100+
# Get all text content including line breaks
101+
bin_type_text = bin_type_cell.get_text(separator='\n').strip()
102+
103+
# Split multiple bin types that appear on separate lines
104+
bin_types = [bt.strip() for bt in bin_type_text.split('\n') if bt.strip()]
105+
106+
for bin_type in bin_types:
107+
dict_data = {
108+
"type": bin_type,
109+
"collectionDate": date_cell,
110+
}
111+
data["bins"].append(dict_data)
104112

105113
data["bins"].sort(
106114
key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y")
107115
)
108116
except Exception as e:
109-
# Here you can log the exception if needed
110117
print(f"An error occurred: {e}")
111-
# Optionally, re-raise the exception if you want it to propagate
112118
raise
113119
finally:
114-
# This block ensures that the driver is closed regardless of an exception
115120
if driver:
116121
driver.quit()
117122
return data

0 commit comments

Comments
 (0)