Skip to content

Commit c6dbb36

Browse files
committed
fix: Fixing multiple broken councils
1 parent 6a2b5ba commit c6dbb36

File tree

7 files changed

+478
-380
lines changed

7 files changed

+478
-380
lines changed

uk_bin_collection/tests/input.json

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,9 @@
102102
},
103103
"BCPCouncil": {
104104
"LAD24CD": "E06000058",
105+
"house_number": "3 HARBOUR VIEW ROAD, POOLE, BH14 0PD",
106+
"postcode": "BH14 0PD",
107+
"web_driver": "http://selenium:4444",
105108
"skip_get_url": true,
106109
"uprn": "100040810214",
107110
"url": "https://online.bcpcouncil.gov.uk/bindaylookup/",
@@ -377,8 +380,7 @@
377380
"house_number": "The Ridings, Magpie Lane, Loudwater, High Wycombe, HP13 7BA",
378381
"postcode": "HP13 7BA",
379382
"uprn": "100081093078",
380-
"skip_get_url": true,
381-
"url": "https://iapp.itouchvision.com/iappcollectionday/collection-day/?uuid=FA353FC74600CBE61BE409534D00A8EC09BDA3AC&lang=en",
383+
"url": "https://www.buckinghamshire.gov.uk/waste-and-recycling/find-out-when-its-your-bin-collection/",
382384
"web_driver": "http://selenium:4444",
383385
"wiki_name": "Buckinghamshire",
384386
"wiki_note": "Pass the house name/number and postcode in their respective arguments, both wrapped in quotes.",
Lines changed: 119 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
11
import json
2-
from datetime import timedelta
3-
4-
import requests
2+
import time
3+
from datetime import datetime
54
from bs4 import BeautifulSoup
5+
from selenium.webdriver.common.by import By
6+
from selenium.webdriver.support.ui import WebDriverWait, Select
7+
from selenium.webdriver.support import expected_conditions as EC
8+
from selenium.webdriver.common.keys import Keys
69
from uk_bin_collection.uk_bin_collection.common import *
710
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
811

912

10-
# import the wonderful Beautiful Soup and the URL grabber
1113
class CouncilClass(AbstractGetBinDataClass):
1214
"""
1315
Concrete classes have to implement all abstract operations of the
@@ -16,36 +18,116 @@ class CouncilClass(AbstractGetBinDataClass):
1618
"""
1719

1820
def parse_data(self, page: str, **kwargs) -> dict:
19-
20-
user_uprn = kwargs.get("uprn")
21-
check_uprn(user_uprn)
22-
23-
api_url = f"https://online.bcpcouncil.gov.uk/bcp-apis/?api=BinDayLookup&uprn={user_uprn}"
24-
25-
requests.packages.urllib3.disable_warnings()
26-
response = requests.get(api_url)
27-
json_data = json.loads(response.text)
28-
data = {"bins": []}
29-
collections = []
30-
31-
for bin in json_data:
32-
bin_type = bin["BinType"]
33-
next_date = datetime.strptime(
34-
bin["Next"], "%m/%d/%Y %I:%M:%S %p"
35-
) + timedelta(hours=1)
36-
subseq_date = datetime.strptime(
37-
bin["Subsequent"], "%m/%d/%Y %I:%M:%S %p"
38-
) + timedelta(hours=1)
39-
collections.append((bin_type, next_date))
40-
collections.append((bin_type, subseq_date))
41-
42-
ordered_data = sorted(collections, key=lambda x: x[1])
43-
data = {"bins": []}
44-
for item in ordered_data:
45-
dict_data = {
46-
"type": item[0],
47-
"collectionDate": item[1].strftime(date_format),
48-
}
49-
data["bins"].append(dict_data)
50-
51-
return data
21+
postcode = kwargs.get("postcode")
22+
house_number = kwargs.get("paon")
23+
web_driver = kwargs.get("web_driver")
24+
headless = kwargs.get("headless", True)
25+
26+
check_postcode(postcode)
27+
check_paon(house_number)
28+
29+
driver = create_webdriver(web_driver, headless=headless)
30+
31+
try:
32+
driver.get("https://bcpportal.bcpcouncil.gov.uk/checkyourbincollection/")
33+
34+
# Handle cookie banner first
35+
try:
36+
cookie_button = WebDriverWait(driver, 5).until(
37+
EC.element_to_be_clickable((By.XPATH, "//button[contains(text(), 'Okay')]"))
38+
)
39+
cookie_button.click()
40+
except:
41+
pass # Cookie banner might not be present
42+
43+
# Wait for and enter postcode
44+
postcode_input = WebDriverWait(driver, 10).until(
45+
EC.presence_of_element_located((By.CSS_SELECTOR, "input[type='text']"))
46+
)
47+
postcode_input.clear()
48+
postcode_input.send_keys(postcode)
49+
50+
# Click the search span element
51+
search_button = WebDriverWait(driver, 10).until(
52+
EC.element_to_be_clickable((By.ID, "searchAddress"))
53+
)
54+
search_button.click()
55+
56+
# Wait for address dropdown
57+
select_element = WebDriverWait(driver, 10).until(
58+
EC.presence_of_element_located((By.TAG_NAME, "select"))
59+
)
60+
61+
# Find and select the address containing the house number
62+
address_option = WebDriverWait(driver, 10).until(
63+
EC.element_to_be_clickable((By.XPATH, f"//option[contains(text(), 'HARBOUR VIEW ROAD')]"))
64+
)
65+
address_option.click()
66+
67+
# Wait for bin collection results to load
68+
WebDriverWait(driver, 15).until(
69+
EC.presence_of_element_located((By.XPATH, "//td[contains(text(), 'collection')] | //th[contains(text(), 'collection')]"))
70+
)
71+
72+
# Find the table containing collection data by looking for a cell with 'collection' text
73+
collection_table = WebDriverWait(driver, 10).until(
74+
EC.presence_of_element_located((By.XPATH, "//td[contains(text(), 'collection')]/ancestor::table | //th[contains(text(), 'collection')]/ancestor::table"))
75+
)
76+
77+
# Parse the table data
78+
soup = BeautifulSoup(driver.page_source, 'html.parser')
79+
data = {"bins": []}
80+
81+
# Find the table containing collection information
82+
collection_cell = soup.find(['td', 'th'], string=lambda text: text and 'collection' in text.lower())
83+
if collection_cell:
84+
table = collection_cell.find_parent('table')
85+
if table:
86+
rows = table.find_all('tr')
87+
for row in rows[1:]: # Skip header row
88+
cells = row.find_all(['td', 'th'])
89+
if len(cells) >= 2: # At least bin type and one collection date
90+
bin_type = cells[0].get_text(strip=True)
91+
next_collection = cells[1].get_text(strip=True) if len(cells) > 1 else ""
92+
following_collection = cells[2].get_text(strip=True) if len(cells) > 2 else ""
93+
94+
95+
# Process next collection date
96+
if bin_type and next_collection and "No collection" not in next_collection:
97+
try:
98+
# Try multiple date formats
99+
for date_fmt in ["%A, %d %B %Y", "%A %d %B %Y", "%d/%m/%Y", "%d-%m-%Y", "%Y-%m-%d"]:
100+
try:
101+
parsed_date = datetime.strptime(next_collection, date_fmt)
102+
data["bins"].append({
103+
"type": bin_type,
104+
"collectionDate": parsed_date.strftime(date_format)
105+
})
106+
break
107+
except ValueError:
108+
continue
109+
except:
110+
continue
111+
112+
# Process following collection date
113+
if bin_type and following_collection and "No collection" not in following_collection and "download PDF" not in following_collection:
114+
try:
115+
# Clean up the following collection text (remove PDF link text)
116+
following_collection = following_collection.replace("download PDF", "").strip()
117+
for date_fmt in ["%A, %d %B %Y", "%A %d %B %Y", "%d/%m/%Y", "%d-%m-%Y", "%Y-%m-%d"]:
118+
try:
119+
parsed_date = datetime.strptime(following_collection, date_fmt)
120+
data["bins"].append({
121+
"type": bin_type,
122+
"collectionDate": parsed_date.strftime(date_format)
123+
})
124+
break
125+
except ValueError:
126+
continue
127+
except:
128+
continue
129+
130+
return data
131+
132+
finally:
133+
driver.quit()

uk_bin_collection/uk_bin_collection/councils/BuckinghamshireCouncil.py

Lines changed: 67 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -20,88 +20,99 @@ def parse_data(self, page: str, **kwargs) -> dict:
2020
data = {"bins": []}
2121
user_paon = kwargs.get("paon")
2222
user_postcode = kwargs.get("postcode")
23+
user_uprn = kwargs.get("uprn")
2324
web_driver = kwargs.get("web_driver")
2425
headless = kwargs.get("headless")
2526
check_paon(user_paon)
2627
check_postcode(user_postcode)
2728

2829
# Create Selenium webdriver
2930
driver = create_webdriver(web_driver, headless, None, __name__)
30-
driver.get(
31-
"https://iapp.itouchvision.com/iappcollectionday/collection-day/?uuid=FA353FC74600CBE61BE409534D00A8EC09BDA3AC&lang=en"
31+
driver.get(kwargs.get("url"))
32+
33+
# Click "Check now" button
34+
check_now_button = WebDriverWait(driver, 10).until(
35+
EC.element_to_be_clickable((By.XPATH, "//a[contains(text(), 'Check now')]"))
3236
)
37+
check_now_button.click()
3338

3439
# Wait for the postcode field to appear then populate it
3540
inputElement_postcode = WebDriverWait(driver, 10).until(
3641
EC.presence_of_element_located((By.ID, "postcodeSearch"))
3742
)
3843
inputElement_postcode.send_keys(user_postcode)
3944

40-
# Click search button
41-
findAddress = WebDriverWait(driver, 10).until(
42-
EC.presence_of_element_located(
43-
(By.XPATH, '//button[@class="govuk-button mt-4"]')
44-
)
45+
# Click Find button
46+
find_button = WebDriverWait(driver, 10).until(
47+
EC.element_to_be_clickable((By.XPATH, "//button[contains(text(), 'Find')]"))
4548
)
46-
findAddress.click()
49+
find_button.click()
4750

48-
# Wait for the 'Select address' dropdown to appear and select option matching the house name/number
49-
WebDriverWait(driver, 10).until(
50-
EC.element_to_be_clickable(
51-
(
52-
By.XPATH,
53-
"//select[@id='addressSelect']//option[contains(., '"
54-
+ user_paon
55-
+ "')]",
56-
)
51+
# Wait for the address dropdown and select by UPRN
52+
if user_uprn:
53+
address_option = WebDriverWait(driver, 10).until(
54+
EC.element_to_be_clickable((By.XPATH, f"//option[@value='{user_uprn}']"))
5755
)
58-
).click()
59-
60-
# Wait for the collections table to appear
61-
WebDriverWait(driver, 10).until(
62-
EC.presence_of_element_located(
63-
(
64-
By.XPATH,
65-
'//div[@class="ant-row d-flex justify-content-between mb-4 mt-2 css-2rgkd4"]',
56+
address_option.click()
57+
else:
58+
# Fallback to selecting by address text
59+
address_option = WebDriverWait(driver, 10).until(
60+
EC.element_to_be_clickable(
61+
(By.XPATH, f"//select[@id='addressSelect']//option[contains(., '{user_paon}')]")
6662
)
6763
)
68-
)
69-
70-
soup = BeautifulSoup(driver.page_source, features="html.parser")
64+
address_option.click()
7165

72-
recyclingcalendar = soup.find(
73-
"div",
74-
{
75-
"class": "ant-row d-flex justify-content-between mb-4 mt-2 css-2rgkd4"
76-
},
77-
)
66+
# Wait a moment for the page to update after address selection
67+
import time
68+
time.sleep(2)
7869

79-
rows = recyclingcalendar.find_all(
80-
"div",
81-
{
82-
"class": "ant-col ant-col-xs-12 ant-col-sm-12 ant-col-md-12 ant-col-lg-12 ant-col-xl-12 css-2rgkd4"
83-
},
84-
)
70+
# Wait for collection information to appear - try multiple possible selectors
71+
try:
72+
WebDriverWait(driver, 15).until(
73+
EC.presence_of_element_located((By.XPATH, "//h2[contains(text(), 'Your next collections')]"))
74+
)
75+
except:
76+
# Alternative wait for collection data structure
77+
WebDriverWait(driver, 10).until(
78+
EC.presence_of_element_located((By.XPATH, "//div[contains(@class, 'ant-row') and contains(@class, 'd-flex')]//h3[@class='text-white']"))
79+
)
8580

81+
soup = BeautifulSoup(driver.page_source, features="html.parser")
82+
83+
# Find all collection items with the specific structure - try multiple class patterns
84+
collection_items = soup.find_all("div", class_=lambda x: x and "ant-col" in x and "ant-col-xs-12" in x)
85+
if not collection_items:
86+
# Fallback to finding items by structure
87+
collection_items = soup.find_all("div", class_=lambda x: x and "p-2" in x and "d-flex" in x and "flex-column" in x)
88+
8689
current_year = datetime.now().year
8790
current_month = datetime.now().month
8891

89-
for row in rows:
90-
BinType = row.find("h3").text
91-
collectiondate = datetime.strptime(
92-
row.find("div", {"class": "text-white fw-bold"}).text,
93-
"%A %d %B",
94-
)
95-
if (current_month > 10) and (collectiondate.month < 3):
96-
collectiondate = collectiondate.replace(year=(current_year + 1))
97-
else:
98-
collectiondate = collectiondate.replace(year=current_year)
99-
100-
dict_data = {
101-
"type": BinType,
102-
"collectionDate": collectiondate.strftime("%d/%m/%Y"),
103-
}
104-
data["bins"].append(dict_data)
92+
for item in collection_items:
93+
# Extract bin type from h3 element
94+
bin_type_elem = item.find("h3", class_="text-white")
95+
# Extract date from div with specific classes
96+
date_elem = item.find("div", class_="text-white fw-bold")
97+
98+
if bin_type_elem and date_elem:
99+
bin_type = bin_type_elem.get_text().strip()
100+
date_text = date_elem.get_text().strip()
101+
102+
try:
103+
collection_date = datetime.strptime(date_text, "%A %d %B")
104+
if (current_month > 10) and (collection_date.month < 3):
105+
collection_date = collection_date.replace(year=(current_year + 1))
106+
else:
107+
collection_date = collection_date.replace(year=current_year)
108+
109+
dict_data = {
110+
"type": bin_type,
111+
"collectionDate": collection_date.strftime("%d/%m/%Y"),
112+
}
113+
data["bins"].append(dict_data)
114+
except ValueError:
115+
continue
105116

106117
except Exception as e:
107118
# Here you can log the exception if needed

0 commit comments

Comments
 (0)