Skip to content

Commit 94c255a

Browse files
authored
Merge pull request #1602 from m26dvd/master
fix: Council Fix Pack - September 2025
2 parents aa99936 + d5516f6 commit 94c255a

14 files changed

+270
-399
lines changed

uk_bin_collection/tests/input.json

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -102,12 +102,9 @@
102102
},
103103
"BCPCouncil": {
104104
"LAD24CD": "E06000058",
105-
"house_number": "3 HARBOUR VIEW ROAD, POOLE, BH14 0PD",
106-
"postcode": "BH14 0PD",
107-
"web_driver": "http://selenium:4444",
108105
"skip_get_url": true,
109106
"uprn": "100040810214",
110-
"url": "https://online.bcpcouncil.gov.uk/bindaylookup/",
107+
"url": "https://bcpportal.bcpcouncil.gov.uk/checkyourbincollection",
111108
"wiki_name": "Bournemouth, Christchurch and Poole",
112109
"wiki_note": "You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN."
113110
},
@@ -888,7 +885,7 @@
888885
"ErewashBoroughCouncil": {
889886
"skip_get_url": true,
890887
"uprn": "10003582028",
891-
"url": "https://map.erewash.gov.uk/isharelive.web/myerewash.aspx",
888+
"url": "https://www.erewash.gov.uk",
892889
"wiki_name": "Erewash",
893890
"wiki_note": "Pass the UPRN. You can find it using [FindMyAddress](https://www.findmyaddress.co.uk/search).",
894891
"LAD24CD": "E07000036"
@@ -995,7 +992,7 @@
995992
},
996993
"GlasgowCityCouncil": {
997994
"uprn": "906700034497",
998-
"url": "https://onlineservices.glasgow.gov.uk/forms/RefuseAndRecyclingWebApplication/AddressSearch.aspx",
995+
"url": "https://onlineservices.glasgow.gov.uk/forms/refuseandrecyclingcalendar/AddressSearch.aspx",
999996
"skip_get_url": true,
1000997
"wiki_name": "Glasgow City",
1001998
"wiki_note": "You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN.",
Lines changed: 45 additions & 120 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,13 @@
1-
import json
21
import time
3-
from datetime import datetime
4-
from bs4 import BeautifulSoup
5-
from selenium.webdriver.common.by import By
6-
from selenium.webdriver.support.ui import WebDriverWait, Select
7-
from selenium.webdriver.support import expected_conditions as EC
8-
from selenium.webdriver.common.keys import Keys
2+
3+
import requests
4+
from dateutil.relativedelta import relativedelta
5+
96
from uk_bin_collection.uk_bin_collection.common import *
107
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
118

129

10+
# import the wonderful Beautiful Soup and the URL grabber
1311
class CouncilClass(AbstractGetBinDataClass):
1412
"""
1513
Concrete classes have to implement all abstract operations of the
@@ -18,116 +16,43 @@ class CouncilClass(AbstractGetBinDataClass):
1816
"""
1917

2018
def parse_data(self, page: str, **kwargs) -> dict:
21-
postcode = kwargs.get("postcode")
22-
house_number = kwargs.get("paon")
23-
web_driver = kwargs.get("web_driver")
24-
headless = kwargs.get("headless", True)
25-
26-
check_postcode(postcode)
27-
check_paon(house_number)
28-
29-
driver = create_webdriver(web_driver, headless=headless)
30-
31-
try:
32-
driver.get("https://bcpportal.bcpcouncil.gov.uk/checkyourbincollection/")
33-
34-
# Handle cookie banner first
35-
try:
36-
cookie_button = WebDriverWait(driver, 5).until(
37-
EC.element_to_be_clickable((By.XPATH, "//button[contains(text(), 'Okay')]"))
38-
)
39-
cookie_button.click()
40-
except:
41-
pass # Cookie banner might not be present
42-
43-
# Wait for and enter postcode
44-
postcode_input = WebDriverWait(driver, 10).until(
45-
EC.presence_of_element_located((By.CSS_SELECTOR, "input[type='text']"))
46-
)
47-
postcode_input.clear()
48-
postcode_input.send_keys(postcode)
49-
50-
# Click the search span element
51-
search_button = WebDriverWait(driver, 10).until(
52-
EC.element_to_be_clickable((By.ID, "searchAddress"))
53-
)
54-
search_button.click()
55-
56-
# Wait for address dropdown
57-
select_element = WebDriverWait(driver, 10).until(
58-
EC.presence_of_element_located((By.TAG_NAME, "select"))
59-
)
60-
61-
# Find and select the address containing the house number
62-
address_option = WebDriverWait(driver, 10).until(
63-
EC.element_to_be_clickable((By.XPATH, f"//option[contains(text(), 'HARBOUR VIEW ROAD')]"))
64-
)
65-
address_option.click()
66-
67-
# Wait for bin collection results to load
68-
WebDriverWait(driver, 15).until(
69-
EC.presence_of_element_located((By.XPATH, "//td[contains(text(), 'collection')] | //th[contains(text(), 'collection')]"))
70-
)
71-
72-
# Find the table containing collection data by looking for a cell with 'collection' text
73-
collection_table = WebDriverWait(driver, 10).until(
74-
EC.presence_of_element_located((By.XPATH, "//td[contains(text(), 'collection')]/ancestor::table | //th[contains(text(), 'collection')]/ancestor::table"))
75-
)
76-
77-
# Parse the table data
78-
soup = BeautifulSoup(driver.page_source, 'html.parser')
79-
data = {"bins": []}
80-
81-
# Find the table containing collection information
82-
collection_cell = soup.find(['td', 'th'], string=lambda text: text and 'collection' in text.lower())
83-
if collection_cell:
84-
table = collection_cell.find_parent('table')
85-
if table:
86-
rows = table.find_all('tr')
87-
for row in rows[1:]: # Skip header row
88-
cells = row.find_all(['td', 'th'])
89-
if len(cells) >= 2: # At least bin type and one collection date
90-
bin_type = cells[0].get_text(strip=True)
91-
next_collection = cells[1].get_text(strip=True) if len(cells) > 1 else ""
92-
following_collection = cells[2].get_text(strip=True) if len(cells) > 2 else ""
93-
94-
95-
# Process next collection date
96-
if bin_type and next_collection and "No collection" not in next_collection:
97-
try:
98-
# Try multiple date formats
99-
for date_fmt in ["%A, %d %B %Y", "%A %d %B %Y", "%d/%m/%Y", "%d-%m-%Y", "%Y-%m-%d"]:
100-
try:
101-
parsed_date = datetime.strptime(next_collection, date_fmt)
102-
data["bins"].append({
103-
"type": bin_type,
104-
"collectionDate": parsed_date.strftime(date_format)
105-
})
106-
break
107-
except ValueError:
108-
continue
109-
except:
110-
continue
111-
112-
# Process following collection date
113-
if bin_type and following_collection and "No collection" not in following_collection and "download PDF" not in following_collection:
114-
try:
115-
# Clean up the following collection text (remove PDF link text)
116-
following_collection = following_collection.replace("download PDF", "").strip()
117-
for date_fmt in ["%A, %d %B %Y", "%A %d %B %Y", "%d/%m/%Y", "%d-%m-%Y", "%Y-%m-%d"]:
118-
try:
119-
parsed_date = datetime.strptime(following_collection, date_fmt)
120-
data["bins"].append({
121-
"type": bin_type,
122-
"collectionDate": parsed_date.strftime(date_format)
123-
})
124-
break
125-
except ValueError:
126-
continue
127-
except:
128-
continue
129-
130-
return data
131-
132-
finally:
133-
driver.quit()
19+
# Make a BS4 object
20+
uprn = kwargs.get("uprn")
21+
# usrn = kwargs.get("paon")
22+
check_uprn(uprn)
23+
# check_usrn(usrn)
24+
bindata = {"bins": []}
25+
26+
# uprn = uprn.zfill(12)
27+
28+
API_URL = "https://prod-17.uksouth.logic.azure.com/workflows/58253d7b7d754447acf9fe5fcf76f493/triggers/manual/paths/invoke?api-version=2016-06-01&sp=%2Ftriggers%2Fmanual%2Frun&sv=1.0&sig=TAvYIUFj6dzaP90XQCm2ElY6Cd34ze05I3ba7LKTiBs"
29+
30+
headers = {
31+
"Content-Type": "application/json",
32+
"Accept": "*/*",
33+
"User-Agent": "Mozilla/5.0",
34+
"Referer": "https://bcpportal.bcpcouncil.gov.uk/",
35+
}
36+
s = requests.session()
37+
data = {
38+
"uprn": uprn,
39+
}
40+
41+
r = s.post(API_URL, json=data, headers=headers)
42+
r.raise_for_status()
43+
44+
data = r.json()
45+
rows_data = data["data"]
46+
for row in rows_data:
47+
bin_type = row["wasteContainerUsageTypeDescription"]
48+
collections = row["scheduleDateRange"]
49+
for collection in collections:
50+
dict_data = {
51+
"type": bin_type,
52+
"collectionDate": datetime.strptime(
53+
collection, "%Y-%m-%d"
54+
).strftime(date_format),
55+
}
56+
bindata["bins"].append(dict_data)
57+
58+
return bindata

uk_bin_collection/uk_bin_collection/councils/BasingstokeCouncil.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
1-
from bs4 import BeautifulSoup
21
from datetime import datetime
2+
33
import requests
4+
from bs4 import BeautifulSoup
5+
46
from uk_bin_collection.uk_bin_collection.common import *
57
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
68

@@ -10,6 +12,7 @@
1012
"glass": "rteelem_ctl03_pnlCollections_Glass",
1113
# Garden waste data is only returned if the property is subscribed to the Garden Waste service
1214
"garden": "rteelem_ctl03_pnlCollections_GardenWaste",
15+
"food": "rteelem_ctl03_pnlCollections_Food",
1316
}
1417

1518

uk_bin_collection/uk_bin_collection/councils/BrightonandHoveCityCouncil.py

Lines changed: 15 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,9 @@ def parse_data(self, page: str, **kwargs) -> dict:
7272
break
7373

7474
if not found:
75-
raise Exception(f"Address containing '{user_paon}' not found in dropdown options")
75+
raise Exception(
76+
f"Address containing '{user_paon}' not found in dropdown options"
77+
)
7678

7779
submit_btn = wait.until(
7880
EC.presence_of_element_located(
@@ -84,7 +86,7 @@ def parse_data(self, page: str, **kwargs) -> dict:
8486

8587
results = wait.until(
8688
EC.presence_of_element_located(
87-
(By.XPATH, f'//span[contains(@class,"collection-sub")]')
89+
(By.XPATH, f'//div[contains(@class,"mx-name-listView1")]')
8890
)
8991
)
9092

@@ -96,44 +98,21 @@ def parse_data(self, page: str, **kwargs) -> dict:
9698
current_date = datetime.now()
9799

98100
# Find all elements with class starting with 'mx-name-index-'
99-
bins = soup.find_all(class_=lambda x: x and x.startswith("mx-name-index-"))
101+
bin_view = soup.find(class_="mx-name-listView1")
102+
bins = bin_view.find_all(
103+
class_=lambda x: x and x.startswith("mx-name-index-")
104+
)
100105

101106
for bin_item in bins:
102-
bin_type = bin_item.find(class_="collection-main").text.strip()
103-
day_of_week_elements = bin_item.find_all(class_="collection-header")
104-
bin_date = None
105-
106-
for elem in day_of_week_elements:
107-
if (
108-
elem.text.strip() != bin_type
109-
): # Avoid taking the bin type as the date
110-
next_sibling = elem.find_next_sibling()
111-
if next_sibling:
112-
bin_date_str = next_sibling.text.strip()
113-
try:
114-
# Try parsing the date string in the format 'dd Month' (e.g., '30 Dec', '5 January')
115-
bin_date = datetime.strptime(bin_date_str, "%d %b")
116-
except ValueError:
117-
try:
118-
# If the above format fails, try 'dd MonthName' (e.g., '30 December', '5 January')
119-
bin_date = datetime.strptime(bin_date_str, "%d %B")
120-
except ValueError:
121-
pass
122-
123-
if bin_date:
124-
# Set the year based on the logic provided
125-
if bin_date.month < current_date.month:
126-
bin_date = bin_date.replace(
127-
year=current_date.year + 1
128-
)
129-
else:
130-
bin_date = bin_date.replace(year=current_date.year)
131-
# Format the date to the desired format
132-
bin_date = bin_date.strftime("%d/%m/%Y")
133-
break
107+
bin_type = bin_item.find(class_="mx-name-text31").text.strip()
108+
109+
bin_date_str = bin_item.find(class_="mx-name-text29").text.strip()
110+
111+
bin_date = datetime.strptime(bin_date_str, "%d %B %Y")
112+
bin_date = bin_date.strftime(date_format)
113+
134114
dict_data = {"type": bin_type, "collectionDate": bin_date}
135115
data["bins"].append(dict_data)
136-
print(data)
137116
except Exception as e:
138117
# Here you can log the exception if needed
139118
print(f"An error occurred: {e}")

uk_bin_collection/uk_bin_collection/councils/CastlepointDistrictCouncil.py

Lines changed: 55 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,9 @@ def parse_data(self, page: str, **kwargs) -> dict:
2626
uprn = kwargs.get("uprn")
2727
check_uprn(uprn)
2828

29-
post_url = "https://apps.castlepoint.gov.uk/cpapps/index.cfm?fa=myStreet.displayDetails"
29+
base_url = "https://apps.castlepoint.gov.uk/cpapps/"
30+
31+
post_url = f"{base_url}index.cfm?fa=myStreet.displayDetails"
3032
post_header_str = (
3133
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,"
3234
"image/apng,"
@@ -51,31 +53,60 @@ def parse_data(self, page: str, **kwargs) -> dict:
5153
soup = BeautifulSoup(post_response.text, features="html.parser")
5254
soup.prettify()
5355

56+
calMonthNext = f"{base_url}{soup.select_one("div.calMonthNext a")["href"]}"
57+
nextmonth_response = requests.post(
58+
calMonthNext, headers=post_headers, data=form_data, verify=False
59+
)
60+
soup_nextmonth = BeautifulSoup(nextmonth_response.text, features="html.parser")
61+
soup_nextmonth.prettify()
62+
5463
data = {"bins": []}
55-
collection_tuple = []
5664

57-
calendar = soup.find("table", class_="calendar")
58-
month = datetime.strptime(
59-
soup.find("div", class_="calMonthCurrent").get_text(), "[%b]"
60-
).strftime("%m")
61-
year = datetime.strptime(
62-
soup.find("h1").get_text(), "About my Street - %B %Y"
63-
).strftime("%Y")
64-
65-
pink_days = [
66-
day.get_text().strip() for day in calendar.find_all("td", class_="pink")
67-
]
68-
black_days = [
69-
day.get_text().strip() for day in calendar.find_all("td", class_="normal")
70-
]
71-
72-
for day in pink_days:
73-
collection_date = datetime(year=int(year), month=int(month), day=int(day))
74-
collection_tuple.append(("Pink collection", collection_date))
75-
76-
for day in black_days:
77-
collection_date = datetime(year=int(year), month=int(month), day=int(day))
78-
collection_tuple.append(("Normal collection", collection_date))
65+
def parse_calendar_month(soup_one_month):
66+
out = []
67+
68+
calendar = soup_one_month.find("table", class_="calendar")
69+
if not calendar:
70+
return out # be robust
71+
72+
# e.g. "[Aug]"
73+
month_txt = soup_one_month.find("div", class_="calMonthCurrent").get_text(
74+
strip=True
75+
)
76+
month = datetime.strptime(month_txt, "[%b]").strftime("%m")
77+
78+
# e.g. "About my Street - August 2025"
79+
year_txt = soup_one_month.find("h1").get_text(strip=True)
80+
year = datetime.strptime(year_txt, "About my Street - %B %Y").strftime("%Y")
81+
82+
pink_days = [
83+
td.get_text(strip=True) for td in calendar.find_all("td", class_="pink")
84+
]
85+
black_days = [
86+
td.get_text(strip=True)
87+
for td in calendar.find_all("td", class_="normal")
88+
]
89+
90+
for day in pink_days:
91+
out.append(
92+
(
93+
"Pink collection",
94+
datetime(year=int(year), month=int(month), day=int(day)),
95+
)
96+
)
97+
for day in black_days:
98+
out.append(
99+
(
100+
"Normal collection",
101+
datetime(year=int(year), month=int(month), day=int(day)),
102+
)
103+
)
104+
105+
return out
106+
107+
collection_tuple = []
108+
for s in (soup, soup_nextmonth):
109+
collection_tuple.extend(parse_calendar_month(s))
79110

80111
ordered_data = sorted(collection_tuple, key=lambda x: x[1])
81112

0 commit comments

Comments
 (0)