Skip to content

Commit 3190cd5

Browse files
authored
Merge pull request #1552 from robbrad/july_25_fixes
July Part 2 : Multiple council fixes
2 parents 2049191 + 0d37443 commit 3190cd5

File tree

8 files changed

+327
-264
lines changed

8 files changed

+327
-264
lines changed

uk_bin_collection/tests/input.json

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -761,13 +761,11 @@
761761
},
762762
"EastHertsCouncil": {
763763
"LAD24CD": "E07000097",
764-
"house_number": "1",
765-
"postcode": "CM20 2FZ",
766764
"skip_get_url": true,
767-
"url": "https://www.eastherts.gov.uk",
768-
"web_driver": "http://selenium:4444",
765+
"uprn": "10023088183",
766+
"url": "https://east-herts.co.uk/api/services/",
769767
"wiki_name": "East Herts Council",
770-
"wiki_note": "Pass the house number and postcode in their respective parameters."
768+
"wiki_note": "Pass the UPRN. You can find it using [FindMyAddress](https://www.findmyaddress.co.uk/search)."
771769
},
772770
"EastLindseyDistrictCouncil": {
773771
"house_number": "1",

uk_bin_collection/uk_bin_collection/councils/BroxbourneCouncil.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,25 @@ def parse_data(self, page: str, **kwargs) -> dict:
2222
check_postcode(user_postcode)
2323

2424
bindata = {"bins": []}
25-
driver = create_webdriver(web_driver, headless, None, __name__)
25+
# Use a realistic user agent to help bypass Cloudflare
26+
user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
27+
driver = create_webdriver(web_driver, headless, user_agent, __name__)
2628

2729
try:
2830
driver.get("https://www.broxbourne.gov.uk/bin-collection-date")
31+
32+
# Wait for Cloudflare challenge to complete
33+
print("Waiting for page to load (Cloudflare check)...")
34+
try:
35+
WebDriverWait(driver, 45).until(
36+
lambda d: "Just a moment" not in d.title and d.title != "" and len(d.find_elements(By.TAG_NAME, "input")) > 0
37+
)
38+
print(f"Page loaded: {driver.title}")
39+
except:
40+
print(f"Timeout waiting for page load. Current title: {driver.title}")
41+
# Try to continue anyway
42+
pass
43+
2944
time.sleep(8)
3045

3146
# Handle cookie banner with multiple attempts
Lines changed: 27 additions & 116 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
1-
from bs4 import BeautifulSoup
2-
from selenium.webdriver.common.by import By
3-
from selenium.webdriver.support import expected_conditions as EC
4-
from selenium.webdriver.support.wait import WebDriverWait
1+
import json
2+
import requests
3+
from datetime import datetime
54

65
from uk_bin_collection.uk_bin_collection.common import *
76
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
@@ -15,116 +14,28 @@ class CouncilClass(AbstractGetBinDataClass):
1514
"""
1615

1716
def parse_data(self, page: str, **kwargs) -> dict:
18-
# Get and check UPRN
19-
driver = None
20-
try:
21-
user_postcode = kwargs.get("postcode")
22-
user_paon = kwargs.get("paon")
23-
check_paon(user_paon)
24-
check_postcode(user_postcode)
25-
web_driver = kwargs.get("web_driver")
26-
headless = kwargs.get("headless")
27-
bindata = {"bins": []}
28-
29-
API_URL = "https://uhte-wrp.whitespacews.com"
30-
31-
# Create Selenium webdriver
32-
driver = create_webdriver(web_driver, headless, None, __name__)
33-
driver.get(API_URL)
34-
35-
# Click Find my bin collection day button
36-
collectionButton = WebDriverWait(driver, 10).until(
37-
EC.element_to_be_clickable((By.LINK_TEXT, "Find my bin collection day"))
38-
)
39-
collectionButton.click()
40-
41-
main_content = WebDriverWait(driver, 10).until(
42-
EC.presence_of_element_located((By.ID, "main-content"))
43-
)
44-
45-
# Wait for the property number field to appear then populate it
46-
inputElement_number = WebDriverWait(driver, 10).until(
47-
EC.element_to_be_clickable(
48-
(
49-
By.ID,
50-
"address_name_number",
51-
)
52-
)
53-
)
54-
inputElement_number.send_keys(user_paon)
55-
56-
# Wait for the postcode field to appear then populate it
57-
inputElement_postcode = WebDriverWait(driver, 10).until(
58-
EC.element_to_be_clickable(
59-
(
60-
By.ID,
61-
"address_postcode",
62-
)
63-
)
64-
)
65-
inputElement_postcode.send_keys(user_postcode)
66-
67-
# Click search button
68-
continueButton = WebDriverWait(driver, 10).until(
69-
EC.element_to_be_clickable(
70-
(
71-
By.ID,
72-
"Submit",
73-
)
74-
)
75-
)
76-
continueButton.click()
77-
78-
# Wait for the 'Search Results' to appear and select the first result
79-
property = WebDriverWait(driver, 10).until(
80-
EC.element_to_be_clickable(
81-
(
82-
By.CSS_SELECTOR,
83-
"li.app-subnav__section-item a",
84-
# "app-subnav__link govuk-link clicker colordarkblue fontfamilyArial fontsize12rem",
85-
# "//a[starts-with(@aria-label, '{user_paon}')]",
86-
)
87-
)
88-
)
89-
property.click()
90-
91-
upcoming_scheduled_collections = WebDriverWait(driver, 10).until(
92-
EC.presence_of_element_located(
93-
(By.ID, "upcoming-scheduled-collections")
94-
)
95-
)
96-
97-
soup = BeautifulSoup(driver.page_source, features="html.parser")
98-
99-
collections = []
100-
for collection in soup.find_all(
101-
"u1",
102-
class_="displayinlineblock justifycontentleft alignitemscenter margin0 padding0",
103-
):
104-
date = collection.find(
105-
"p", string=lambda text: text and "/" in text
106-
).text.strip() # Extract date
107-
service = collection.find(
108-
"p", string=lambda text: text and "Collection Service" in text
109-
).text.strip() # Extract service type
110-
collections.append({"date": date, "service": service})
111-
112-
# Print the parsed data
113-
for item in collections:
114-
115-
dict_data = {
116-
"type": item["service"],
117-
"collectionDate": item["date"],
118-
}
119-
bindata["bins"].append(dict_data)
120-
121-
except Exception as e:
122-
# Here you can log the exception if needed
123-
print(f"An error occurred: {e}")
124-
# Optionally, re-raise the exception if you want it to propagate
125-
raise
126-
finally:
127-
# This block ensures that the driver is closed regardless of an exception
128-
if driver:
129-
driver.quit()
17+
user_uprn = kwargs.get("uprn")
18+
check_uprn(user_uprn)
19+
bindata = {"bins": []}
20+
21+
# Make API request
22+
api_url = f"https://east-herts.co.uk/api/services/{user_uprn}"
23+
response = requests.get(api_url)
24+
response.raise_for_status()
25+
26+
data = response.json()
27+
today = datetime.now().date()
28+
29+
for service in data.get("services", []):
30+
collection_date_str = service.get("collectionDate")
31+
if collection_date_str:
32+
collection_date = datetime.strptime(collection_date_str, "%Y-%m-%d").date()
33+
# Only include future dates
34+
if collection_date >= today:
35+
dict_data = {
36+
"type": service.get("binType", ""),
37+
"collectionDate": collection_date.strftime("%d/%m/%Y"),
38+
}
39+
bindata["bins"].append(dict_data)
40+
13041
return bindata

uk_bin_collection/uk_bin_collection/councils/EastLothianCouncil.py

Lines changed: 27 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
66

77

8-
# import the wonderful Beautiful Soup and the URL grabber
98
class CouncilClass(AbstractGetBinDataClass):
109
"""
1110
Concrete classes have to implement all abstract operations of the
@@ -14,70 +13,59 @@ class CouncilClass(AbstractGetBinDataClass):
1413
"""
1514

1615
def parse_data(self, page: str, **kwargs) -> dict:
17-
1816
user_postcode = kwargs.get("postcode")
1917
user_paon = kwargs.get("paon")
2018
check_postcode(user_postcode)
2119
check_paon(user_paon)
2220
bindata = {"bins": []}
2321

24-
URI = "http://collectiondates.eastlothian.gov.uk/ajax/your-calendar/load-streets-spring-2024.asp"
25-
26-
payload = {
27-
"postcode": user_postcode,
28-
}
29-
22+
# Get address ID from the streets endpoint
23+
streets_uri = "https://collectiondates.eastlothian.gov.uk/ajax/your-calendar/load-streets-summer-2025.asp"
3024
headers = {
31-
"Referer": "http://collectiondates.eastlothian.gov.uk/your-calendar",
25+
"Referer": "https://collectiondates.eastlothian.gov.uk/your-calendar",
3226
"User-Agent": "Mozilla/5.0",
3327
}
34-
35-
# Make the GET request
36-
response = requests.get(URI, headers=headers, params=payload)
37-
38-
# Parse the HTML with BeautifulSoup
28+
29+
response = requests.get(streets_uri, params={"postcode": user_postcode}, headers=headers)
3930
soup = BeautifulSoup(response.text, "html.parser")
40-
41-
# Find the select dropdown
31+
4232
select = soup.find("select", id="SelectStreet")
43-
44-
# Find the option that contains "Flat 1"
33+
if not select:
34+
raise ValueError(f"No streets found for postcode {user_postcode}")
35+
4536
address = select.find("option", string=lambda text: text and user_paon in text)
46-
47-
URI = "http://collectiondates.eastlothian.gov.uk/ajax/your-calendar/load-recycling-summer-2024.asp"
48-
49-
payload = {
50-
"id": address["value"],
51-
}
52-
53-
# Make the GET request
54-
response = requests.get(URI, headers=headers, params=payload)
55-
56-
# Parse the HTML with BeautifulSoup
37+
if not address:
38+
raise ValueError(f"Address '{user_paon}' not found for postcode {user_postcode}")
39+
40+
address_id = address["value"]
41+
42+
# Get collection data using the correct endpoint
43+
collections_uri = "https://collectiondates.eastlothian.gov.uk/ajax/your-calendar/load-recycling-summer-2025.asp"
44+
response = requests.get(collections_uri, params={"id": address_id}, headers=headers)
45+
5746
soup = BeautifulSoup(response.text, "html.parser")
58-
47+
5948
# Extract collection details
6049
calendar_items = soup.find_all("div", class_="calendar-item")
6150
for item in calendar_items:
6251
waste_label = item.find("div", class_="waste-label").text.strip()
6352
waste_value = item.find("div", class_="waste-value").find("h4").text.strip()
64-
53+
6554
try:
6655
collection_date = datetime.strptime(
6756
remove_ordinal_indicator_from_date_string(waste_value),
6857
"%A %d %B %Y",
6958
)
59+
60+
bindata["bins"].append({
61+
"type": waste_label.replace(" is:", ""),
62+
"collectionDate": collection_date.strftime(date_format),
63+
})
7064
except ValueError:
7165
continue
72-
73-
dict_data = {
74-
"type": waste_label.replace(" is:", ""),
75-
"collectionDate": collection_date.strftime(date_format),
76-
}
77-
bindata["bins"].append(dict_data)
78-
66+
7967
bindata["bins"].sort(
8068
key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
8169
)
82-
70+
8371
return bindata

0 commit comments

Comments
 (0)