Skip to content

Commit fa0ec28

Browse files
committed
fix: #1565 - BCP Council
fix: #1565 - BCP Council
1 parent a977d55 commit fa0ec28

File tree

3 files changed

+47
-128
lines changed

3 files changed

+47
-128
lines changed

uk_bin_collection/tests/input.json

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -102,12 +102,9 @@
102102
},
103103
"BCPCouncil": {
104104
"LAD24CD": "E06000058",
105-
"house_number": "3 HARBOUR VIEW ROAD, POOLE, BH14 0PD",
106-
"postcode": "BH14 0PD",
107-
"web_driver": "http://selenium:4444",
108105
"skip_get_url": true,
109106
"uprn": "100040810214",
110-
"url": "https://online.bcpcouncil.gov.uk/bindaylookup/",
107+
"url": "https://bcpportal.bcpcouncil.gov.uk/checkyourbincollection",
111108
"wiki_name": "Bournemouth, Christchurch and Poole",
112109
"wiki_note": "You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN."
113110
},
Lines changed: 45 additions & 120 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,13 @@
1-
import json
21
import time
3-
from datetime import datetime
4-
from bs4 import BeautifulSoup
5-
from selenium.webdriver.common.by import By
6-
from selenium.webdriver.support.ui import WebDriverWait, Select
7-
from selenium.webdriver.support import expected_conditions as EC
8-
from selenium.webdriver.common.keys import Keys
2+
3+
import requests
4+
from dateutil.relativedelta import relativedelta
5+
96
from uk_bin_collection.uk_bin_collection.common import *
107
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
118

129

10+
# import the wonderful Beautiful Soup and the URL grabber
1311
class CouncilClass(AbstractGetBinDataClass):
1412
"""
1513
Concrete classes have to implement all abstract operations of the
@@ -18,116 +16,43 @@ class CouncilClass(AbstractGetBinDataClass):
1816
"""
1917

2018
def parse_data(self, page: str, **kwargs) -> dict:
21-
postcode = kwargs.get("postcode")
22-
house_number = kwargs.get("paon")
23-
web_driver = kwargs.get("web_driver")
24-
headless = kwargs.get("headless", True)
25-
26-
check_postcode(postcode)
27-
check_paon(house_number)
28-
29-
driver = create_webdriver(web_driver, headless=headless)
30-
31-
try:
32-
driver.get("https://bcpportal.bcpcouncil.gov.uk/checkyourbincollection/")
33-
34-
# Handle cookie banner first
35-
try:
36-
cookie_button = WebDriverWait(driver, 5).until(
37-
EC.element_to_be_clickable((By.XPATH, "//button[contains(text(), 'Okay')]"))
38-
)
39-
cookie_button.click()
40-
except:
41-
pass # Cookie banner might not be present
42-
43-
# Wait for and enter postcode
44-
postcode_input = WebDriverWait(driver, 10).until(
45-
EC.presence_of_element_located((By.CSS_SELECTOR, "input[type='text']"))
46-
)
47-
postcode_input.clear()
48-
postcode_input.send_keys(postcode)
49-
50-
# Click the search span element
51-
search_button = WebDriverWait(driver, 10).until(
52-
EC.element_to_be_clickable((By.ID, "searchAddress"))
53-
)
54-
search_button.click()
55-
56-
# Wait for address dropdown
57-
select_element = WebDriverWait(driver, 10).until(
58-
EC.presence_of_element_located((By.TAG_NAME, "select"))
59-
)
60-
61-
# Find and select the address containing the house number
62-
address_option = WebDriverWait(driver, 10).until(
63-
EC.element_to_be_clickable((By.XPATH, f"//option[contains(text(), 'HARBOUR VIEW ROAD')]"))
64-
)
65-
address_option.click()
66-
67-
# Wait for bin collection results to load
68-
WebDriverWait(driver, 15).until(
69-
EC.presence_of_element_located((By.XPATH, "//td[contains(text(), 'collection')] | //th[contains(text(), 'collection')]"))
70-
)
71-
72-
# Find the table containing collection data by looking for a cell with 'collection' text
73-
collection_table = WebDriverWait(driver, 10).until(
74-
EC.presence_of_element_located((By.XPATH, "//td[contains(text(), 'collection')]/ancestor::table | //th[contains(text(), 'collection')]/ancestor::table"))
75-
)
76-
77-
# Parse the table data
78-
soup = BeautifulSoup(driver.page_source, 'html.parser')
79-
data = {"bins": []}
80-
81-
# Find the table containing collection information
82-
collection_cell = soup.find(['td', 'th'], string=lambda text: text and 'collection' in text.lower())
83-
if collection_cell:
84-
table = collection_cell.find_parent('table')
85-
if table:
86-
rows = table.find_all('tr')
87-
for row in rows[1:]: # Skip header row
88-
cells = row.find_all(['td', 'th'])
89-
if len(cells) >= 2: # At least bin type and one collection date
90-
bin_type = cells[0].get_text(strip=True)
91-
next_collection = cells[1].get_text(strip=True) if len(cells) > 1 else ""
92-
following_collection = cells[2].get_text(strip=True) if len(cells) > 2 else ""
93-
94-
95-
# Process next collection date
96-
if bin_type and next_collection and "No collection" not in next_collection:
97-
try:
98-
# Try multiple date formats
99-
for date_fmt in ["%A, %d %B %Y", "%A %d %B %Y", "%d/%m/%Y", "%d-%m-%Y", "%Y-%m-%d"]:
100-
try:
101-
parsed_date = datetime.strptime(next_collection, date_fmt)
102-
data["bins"].append({
103-
"type": bin_type,
104-
"collectionDate": parsed_date.strftime(date_format)
105-
})
106-
break
107-
except ValueError:
108-
continue
109-
except:
110-
continue
111-
112-
# Process following collection date
113-
if bin_type and following_collection and "No collection" not in following_collection and "download PDF" not in following_collection:
114-
try:
115-
# Clean up the following collection text (remove PDF link text)
116-
following_collection = following_collection.replace("download PDF", "").strip()
117-
for date_fmt in ["%A, %d %B %Y", "%A %d %B %Y", "%d/%m/%Y", "%d-%m-%Y", "%Y-%m-%d"]:
118-
try:
119-
parsed_date = datetime.strptime(following_collection, date_fmt)
120-
data["bins"].append({
121-
"type": bin_type,
122-
"collectionDate": parsed_date.strftime(date_format)
123-
})
124-
break
125-
except ValueError:
126-
continue
127-
except:
128-
continue
129-
130-
return data
131-
132-
finally:
133-
driver.quit()
19+
# Make a BS4 object
20+
uprn = kwargs.get("uprn")
21+
# usrn = kwargs.get("paon")
22+
check_uprn(uprn)
23+
# check_usrn(usrn)
24+
bindata = {"bins": []}
25+
26+
# uprn = uprn.zfill(12)
27+
28+
API_URL = "https://prod-17.uksouth.logic.azure.com/workflows/58253d7b7d754447acf9fe5fcf76f493/triggers/manual/paths/invoke?api-version=2016-06-01&sp=%2Ftriggers%2Fmanual%2Frun&sv=1.0&sig=TAvYIUFj6dzaP90XQCm2ElY6Cd34ze05I3ba7LKTiBs"
29+
30+
headers = {
31+
"Content-Type": "application/json",
32+
"Accept": "*/*",
33+
"User-Agent": "Mozilla/5.0",
34+
"Referer": "https://bcpportal.bcpcouncil.gov.uk/",
35+
}
36+
s = requests.session()
37+
data = {
38+
"uprn": uprn,
39+
}
40+
41+
r = s.post(API_URL, json=data, headers=headers)
42+
r.raise_for_status()
43+
44+
data = r.json()
45+
rows_data = data["data"]
46+
for row in rows_data:
47+
bin_type = row["wasteContainerUsageTypeDescription"]
48+
collections = row["scheduleDateRange"]
49+
for collection in collections:
50+
dict_data = {
51+
"type": bin_type,
52+
"collectionDate": datetime.strptime(
53+
collection, "%Y-%m-%d"
54+
).strftime(date_format),
55+
}
56+
bindata["bins"].append(dict_data)
57+
58+
return bindata

wiki/Councils.md

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -482,14 +482,11 @@ Note: You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/searc
482482

483483
### Bournemouth, Christchurch and Poole
484484
```commandline
485-
python collect_data.py BCPCouncil https://online.bcpcouncil.gov.uk/bindaylookup/ -s -u XXXXXXXX -p "XXXX XXX" -n XX -w http://HOST:PORT/
485+
python collect_data.py BCPCouncil https://bcpportal.bcpcouncil.gov.uk/checkyourbincollection -s -u XXXXXXXX
486486
```
487487
Additional parameters:
488488
- `-s` - skip get URL
489489
- `-u` - UPRN
490-
- `-p` - postcode
491-
- `-n` - house number
492-
- `-w` - remote Selenium web driver URL (required for Home Assistant)
493490

494491
Note: You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN.
495492

0 commit comments

Comments
 (0)