Skip to content

Commit 8060dc4

Browse files
authored
Merge pull request #1545 from robbrad/july_25_fixes
fix: multiple broken councils
2 parents f2cf24b + d14fdc0 commit 8060dc4

19 files changed

+1208
-895
lines changed

uk_bin_collection/tests/input.json

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -100,16 +100,11 @@
100100
"wiki_note": "You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN.",
101101
"LAD24CD": "E07000105"
102102
},
103-
"AylesburyValeCouncil": {
104-
"skip_get_url": true,
105-
"uprn": "766252532",
106-
"url": "http://avdcbins.web-labs.co.uk/RefuseApi.asmx",
107-
"wiki_name": "Buckinghamshire",
108-
"wiki_note": "To get the UPRN, please use [FindMyAddress](https://www.findmyaddress.co.uk/search). Returns all published collections in the past, present, future.",
109-
"LAD24CD": "E06000060"
110-
},
111103
"BCPCouncil": {
112104
"LAD24CD": "E06000058",
105+
"house_number": "3 HARBOUR VIEW ROAD, POOLE, BH14 0PD",
106+
"postcode": "BH14 0PD",
107+
"web_driver": "http://selenium:4444",
113108
"skip_get_url": true,
114109
"uprn": "100040810214",
115110
"url": "https://online.bcpcouncil.gov.uk/bindaylookup/",
@@ -137,8 +132,8 @@
137132
"LAD24CD": "E09000002"
138133
},
139134
"BarnetCouncil": {
140-
"house_number": "HA8 7NA, 2, MANOR PARK GARDENS, EDGWARE, BARNET",
141-
"postcode": "HA8 7NA",
135+
"house_number": "26A",
136+
"postcode": "EN4 8TB",
142137
"skip_get_url": true,
143138
"url": "https://www.barnet.gov.uk/recycling-and-waste/bin-collections/find-your-bin-collection-day",
144139
"web_driver": "http://selenium:4444",
@@ -158,7 +153,7 @@
158153
"BasildonCouncil": {
159154
"skip_get_url": true,
160155
"uprn": "10013350430",
161-
"url": "https://basildonportal.azurewebsites.net/api/getPropertyRefuseInformation",
156+
"url": "https://mybasildon.powerappsportals.com/check/where_i_live/",
162157
"wiki_name": "Basildon",
163158
"wiki_note": "To get the UPRN, you will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search).",
164159
"LAD24CD": "E07000066"
@@ -366,6 +361,7 @@
366361
"postcode": "EN8 7FL",
367362
"uprn": "148048608",
368363
"url": "https://www.broxbourne.gov.uk",
364+
"web_driver": "http://selenium:4444",
369365
"wiki_name": "Broxbourne",
370366
"wiki_note": "You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN.",
371367
"LAD24CD": "E07000095"
@@ -381,10 +377,10 @@
381377
"LAD24CD": "E07000172"
382378
},
383379
"BuckinghamshireCouncil": {
384-
"house_number": "2",
380+
"house_number": "The Ridings, Magpie Lane, Loudwater, High Wycombe, HP13 7BA",
385381
"postcode": "HP13 7BA",
386-
"skip_get_url": true,
387-
"url": "https://iapp.itouchvision.com/iappcollectionday/collection-day/?uuid=FA353FC74600CBE61BE409534D00A8EC09BDA3AC&lang=en",
382+
"uprn": "100081093078",
383+
"url": "https://www.buckinghamshire.gov.uk/waste-and-recycling/find-out-when-its-your-bin-collection/",
388384
"web_driver": "http://selenium:4444",
389385
"wiki_name": "Buckinghamshire",
390386
"wiki_note": "Pass the house name/number and postcode in their respective arguments, both wrapped in quotes.",
@@ -584,7 +580,7 @@
584580
"LAD24CD": "E06000052"
585581
},
586582
"CotswoldDistrictCouncil": {
587-
"house_number": "19",
583+
"house_number": "19 SUMMERS WAY, MORETON-IN-MARSH, GL56 0GB",
588584
"postcode": "GL56 0GB",
589585
"skip_get_url": true,
590586
"url": "https://community.cotswold.gov.uk/s/waste-collection-enquiry",

uk_bin_collection/uk_bin_collection/councils/AngusCouncil.py

Lines changed: 69 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import time
21
import re
32
from datetime import datetime
43

@@ -7,6 +6,7 @@
76
from selenium.webdriver.common.keys import Keys
87
from selenium.webdriver.support import expected_conditions as EC
98
from selenium.webdriver.support.ui import Select, WebDriverWait
9+
from selenium.common.exceptions import TimeoutException, NoSuchElementException
1010

1111
from uk_bin_collection.uk_bin_collection.common import *
1212
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
@@ -27,56 +27,95 @@ def parse_data(self, page: str, **kwargs) -> dict:
2727
headless = kwargs.get("headless")
2828
web_driver = kwargs.get("web_driver")
2929
driver = create_webdriver(web_driver, headless, None, __name__)
30-
page = "https://www.angus.gov.uk/bins_litter_and_recycling/bin_collection_days"
31-
32-
driver.get(page)
30+
31+
driver.get("https://www.angus.gov.uk/bins_litter_and_recycling/bin_collection_days")
3332

34-
wait = WebDriverWait(driver, 10)
35-
accept_cookies_button = wait.until(
36-
EC.element_to_be_clickable((By.ID, "ccc-recommended-settings"))
37-
)
38-
accept_cookies_button.click()
33+
wait = WebDriverWait(driver, 20)
34+
35+
# Accept cookies if present
36+
try:
37+
accept_cookies_button = wait.until(
38+
EC.element_to_be_clickable((By.ID, "ccc-recommended-settings"))
39+
)
40+
accept_cookies_button.click()
41+
except TimeoutException:
42+
print("Cookie banner not found, continuing...")
3943

44+
# Click on "Find bin collection days" link
4045
find_your_collection_button = wait.until(
4146
EC.element_to_be_clickable(
42-
(By.XPATH, "/html/body/div[2]/div[2]/div/div/section/div[2]/div/article/div/div/p[2]/a")
47+
(By.XPATH, "//a[contains(text(), 'Find bin collection days') or contains(@href, 'collection')]")
4348
)
4449
)
4550
find_your_collection_button.click()
4651

52+
# Wait for iframe to be present and switch to it
4753
iframe = wait.until(EC.presence_of_element_located((By.ID, "fillform-frame-1")))
4854
driver.switch_to.frame(iframe)
4955

50-
postcode_input = wait.until(EC.presence_of_element_located((By.ID, "searchString")))
51-
postcode_input.send_keys(user_postcode + Keys.TAB + Keys.ENTER)
52-
53-
time.sleep(15)
54-
55-
select_elem = wait.until(EC.presence_of_element_located((By.ID, "customerAddress")))
56-
WebDriverWait(driver, 10).until(
57-
lambda d: len(select_elem.find_elements(By.TAG_NAME, "option")) > 1
58-
)
59-
dropdown = Select(select_elem)
56+
# Handle banner/modal if present
57+
try:
58+
close_button = wait.until(EC.element_to_be_clickable((By.TAG_NAME, "button")))
59+
if close_button.text.strip().lower() in ['close', 'dismiss', 'ok']:
60+
close_button.click()
61+
except TimeoutException:
62+
pass
63+
64+
# Wait for postcode input to be clickable
65+
postcode_input = wait.until(EC.element_to_be_clickable((By.ID, "searchString")))
66+
postcode_input.clear()
67+
postcode_input.send_keys(user_postcode)
68+
69+
# Find and click the search button
70+
try:
71+
submit_btn = driver.find_element(By.XPATH, "//button[contains(text(), 'Search')]")
72+
submit_btn.click()
73+
except:
74+
try:
75+
submit_btn = driver.find_element(By.XPATH, "//input[@type='submit']")
76+
submit_btn.click()
77+
except:
78+
postcode_input.send_keys(Keys.TAB)
79+
postcode_input.send_keys(Keys.ENTER)
80+
81+
# Wait for address dropdown to be present
82+
address_dropdown = wait.until(EC.presence_of_element_located((By.ID, "customerAddress")))
83+
84+
# Wait for dropdown options to populate with extended timeout
85+
try:
86+
WebDriverWait(driver, 30).until(
87+
lambda d: len(d.find_element(By.ID, "customerAddress").find_elements(By.TAG_NAME, "option")) > 1
88+
)
89+
except TimeoutException:
90+
options = address_dropdown.find_elements(By.TAG_NAME, "option")
91+
raise ValueError(f"Dropdown only has {len(options)} options after 30s wait")
92+
93+
# Select the UPRN from dropdown
94+
dropdown = Select(address_dropdown)
6095
dropdown.select_by_value(user_uprn)
6196

62-
time.sleep(10)
63-
97+
# Wait for results to appear
6498
wait.until(
6599
EC.presence_of_element_located(
66-
(By.CSS_SELECTOR, "span.fieldInput.content.html.non-input"))
100+
(By.CSS_SELECTOR, "span.fieldInput.content.html.non-input")
101+
)
67102
)
103+
104+
# Wait additional time for JavaScript to populate the data
105+
import time
106+
time.sleep(15) # Wait 15 seconds for dynamic content to load
68107

108+
# Parse the results
69109
soup = BeautifulSoup(driver.page_source, "html.parser")
70110
bin_data = {"bins": []}
71111
current_date = datetime.now()
72112
current_formatted_date = None
73113

74114
spans = soup.select("span.fieldInput.content.html.non-input")
75-
print(f"Found {len(spans)} bin info spans.")
76115

77116
for i, span in enumerate(spans):
78117
try:
79-
# Look for any non-empty <u> tag recursively
118+
# Look for date in <u> tags
80119
date_tag = next(
81120
(u for u in span.find_all("u") if u and u.text.strip()),
82121
None
@@ -93,22 +132,15 @@ def parse_data(self, page: str, **kwargs) -> dict:
93132
if parsed_date.date() < current_date.date():
94133
parsed_date = parsed_date.replace(year=current_date.year + 1)
95134
current_formatted_date = parsed_date.strftime("%d/%m/%Y")
96-
print(f"[{i}] Parsed date: {current_formatted_date}")
97-
except ValueError as ve:
98-
print(f"[{i}] Could not parse date: '{full_date_str}' - {ve}")
135+
except ValueError:
99136
continue
100-
else:
101-
print(f"[{i}] No date tag found, using last valid date: {current_formatted_date}")
102-
103-
if not current_formatted_date:
104-
print(f"[{i}] No current date to associate bin type with — skipping.")
105-
continue
106137

107-
if not bin_type_tag or not bin_type_tag.text.strip():
108-
print(f"[{i}] No bin type found — skipping.")
138+
if not current_formatted_date or not bin_type_tag:
109139
continue
110140

111141
bin_type = bin_type_tag.text.strip()
142+
if not bin_type:
143+
continue
112144

113145
# Optional seasonal override
114146
try:
@@ -118,25 +150,16 @@ def parse_data(self, page: str, **kwargs) -> dict:
118150
except Exception:
119151
pass
120152

121-
print(f"[{i}] Found bin: {bin_type} on {current_formatted_date}")
122-
123153
bin_data["bins"].append({
124154
"type": bin_type,
125155
"collectionDate": current_formatted_date
126156
})
127157

128-
except Exception as inner_e:
129-
print(f"[{i}] Skipping span due to error: {inner_e}")
130-
continue
131-
132-
except Exception as inner_e:
133-
print(f"Skipping span due to error: {inner_e}")
158+
except Exception:
134159
continue
135160

136161
if not bin_data["bins"]:
137162
raise ValueError("No bin data found.")
138-
139-
print(bin_data)
140163

141164
return bin_data
142165

uk_bin_collection/uk_bin_collection/councils/AylesburyValeCouncil.py

Lines changed: 0 additions & 69 deletions
This file was deleted.

0 commit comments

Comments
 (0)