Skip to content

Commit c7d073e

Browse files
authored
Merge pull request #1594 from robbrad/aug_25_release2
Aug 25 release2
2 parents a2541f1 + b1e09dd commit c7d073e

14 files changed

+1378
-935
lines changed

uk_bin_collection/tests/input.json

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,7 @@
253253
"postcode": "BL1 5PQ",
254254
"skip_get_url": true,
255255
"uprn": "100010886936",
256-
"url": "https://carehomes.bolton.gov.uk/bins.aspx",
256+
"url": "https://web.bolton.gov.uk/bins.aspx",
257257
"web_driver": "http://selenium:4444",
258258
"wiki_name": "Bolton",
259259
"wiki_note": "To get the UPRN, you will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search). Previously required a single field that was UPRN and full address; now requires UPRN and postcode as separate fields.",
@@ -377,13 +377,10 @@
377377
"LAD24CD": "E07000172"
378378
},
379379
"BuckinghamshireCouncil": {
380-
"house_number": "The Ridings, Magpie Lane, Loudwater, High Wycombe, HP13 7BA",
381-
"postcode": "HP13 7BA",
382380
"uprn": "100081093078",
383381
"url": "https://www.buckinghamshire.gov.uk/waste-and-recycling/find-out-when-its-your-bin-collection/",
384-
"web_driver": "http://selenium:4444",
385382
"wiki_name": "Buckinghamshire",
386-
"wiki_note": "Pass the house name/number and postcode in their respective arguments, both wrapped in quotes.",
383+
"wiki_note": "Pass the UPRN. You can find it using [FindMyAddress](https://www.findmyaddress.co.uk/search).",
387384
"LAD24CD": "E06000060"
388385
},
389386
"BurnleyBoroughCouncil": {
@@ -1446,7 +1443,7 @@
14461443
"house_number": "71",
14471444
"postcode": "ME16 8BT",
14481445
"url": "https://my.maidstone.gov.uk/service/Find-your-bin-day",
1449-
"web_driver": "http://selenium:4444",
1446+
"web_driver": "http://selenium:4444",
14501447
"wiki_name": "Maidstone",
14511448
"wiki_note": "Pass the house number and postcode in their respective parameters. This parser requires a Selenium webdriver.",
14521449
"LAD24CD": "E07000110"

uk_bin_collection/uk_bin_collection/councils/BoltonCouncil.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def parse_data(self, page: str, **kwargs) -> dict:
3535
data = {"bins": []}
3636

3737
# Get our initial session running
38-
page = "https://carehomes.bolton.gov.uk/bins.aspx"
38+
page = "https://web.bolton.gov.uk/bins.aspx"
3939

4040
driver = create_webdriver(web_driver, headless, None, __name__)
4141
driver.get(page)
Lines changed: 75 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,26 @@
1-
from bs4 import BeautifulSoup
2-
from selenium.webdriver.common.by import By
3-
from selenium.webdriver.support import expected_conditions as EC
4-
from selenium.webdriver.support.wait import WebDriverWait
1+
import json
2+
from dataclasses import asdict, dataclass
3+
from typing import Literal
54

6-
from uk_bin_collection.uk_bin_collection.common import *
5+
import requests
6+
from cryptography.hazmat.backends import default_backend
7+
from cryptography.hazmat.primitives import padding
8+
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
9+
10+
from uk_bin_collection.uk_bin_collection.common import check_uprn
711
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
812

13+
key_hex = "F57E76482EE3DC3336495DEDEEF3962671B054FE353E815145E29C5689F72FEC"
14+
iv_hex = "2CBF4FC35C69B82362D393A4F0B9971A"
15+
16+
17+
@dataclass
18+
class BucksInput:
19+
P_CLIENT_ID: Literal[152]
20+
P_COUNCIL_ID: Literal[34505]
21+
P_LANG_CODE: Literal["EN"]
22+
P_UPRN: str
23+
924

1025
class CouncilClass(AbstractGetBinDataClass):
1126
"""
@@ -14,113 +29,73 @@ class CouncilClass(AbstractGetBinDataClass):
1429
implementation.
1530
"""
1631

17-
def parse_data(self, page: str, **kwargs) -> dict:
18-
driver = None
32+
def encode_body(self, bucks_input: BucksInput):
33+
key = bytes.fromhex(key_hex)
34+
iv = bytes.fromhex(iv_hex)
35+
36+
json_data = json.dumps(asdict(bucks_input))
37+
data_bytes = json_data.encode("utf-8")
38+
39+
padder = padding.PKCS7(128).padder()
40+
padded_data = padder.update(data_bytes) + padder.finalize()
41+
42+
backend = default_backend()
43+
cipher = Cipher(algorithms.AES(key), modes.CBC(iv), backend=backend)
44+
encryptor = cipher.encryptor()
45+
ciphertext = encryptor.update(padded_data) + encryptor.finalize()
46+
47+
return ciphertext.hex()
48+
49+
def decode_response(self, hex_input: str):
50+
51+
key = bytes.fromhex(key_hex)
52+
iv = bytes.fromhex(iv_hex)
53+
ciphertext = bytes.fromhex(hex_input)
54+
55+
backend = default_backend()
56+
cipher = Cipher(algorithms.AES(key), modes.CBC(iv), backend=backend)
57+
decryptor = cipher.decryptor()
58+
decrypted_padded = decryptor.update(ciphertext) + decryptor.finalize()
59+
60+
unpadder = padding.PKCS7(128).unpadder()
61+
plaintext_bytes = unpadder.update(decrypted_padded) + unpadder.finalize()
62+
plaintext = plaintext_bytes.decode("utf-8")
63+
64+
return json.loads(plaintext)
65+
66+
def parse_data(self, _: str, **kwargs) -> dict:
1967
try:
20-
data = {"bins": []}
21-
user_paon = kwargs.get("paon")
22-
user_postcode = kwargs.get("postcode")
23-
user_uprn = kwargs.get("uprn")
24-
web_driver = kwargs.get("web_driver")
25-
headless = kwargs.get("headless")
26-
check_paon(user_paon)
27-
check_postcode(user_postcode)
28-
29-
# Create Selenium webdriver
30-
driver = create_webdriver(web_driver, headless, None, __name__)
31-
driver.get(kwargs.get("url"))
32-
33-
# Click "Check now" button
34-
check_now_button = WebDriverWait(driver, 10).until(
35-
EC.element_to_be_clickable((By.XPATH, "//a[contains(text(), 'Check now')]"))
68+
user_uprn: str = kwargs.get("uprn") or ""
69+
check_uprn(user_uprn)
70+
bucks_input = BucksInput(
71+
P_CLIENT_ID=152, P_COUNCIL_ID=34505, P_LANG_CODE="EN", P_UPRN=user_uprn
3672
)
37-
check_now_button.click()
3873

39-
# Wait for the postcode field to appear then populate it
40-
inputElement_postcode = WebDriverWait(driver, 10).until(
41-
EC.presence_of_element_located((By.ID, "postcodeSearch"))
42-
)
43-
inputElement_postcode.send_keys(user_postcode)
74+
encoded_input = self.encode_body(bucks_input)
4475

45-
# Click Find button
46-
find_button = WebDriverWait(driver, 10).until(
47-
EC.element_to_be_clickable((By.XPATH, "//button[contains(text(), 'Find')]"))
76+
session = requests.Session()
77+
response = session.post(
78+
"https://itouchvision.app/portal/itouchvision/kmbd/collectionDay",
79+
data=encoded_input,
4880
)
49-
find_button.click()
50-
51-
# Wait for the address dropdown and select by UPRN
52-
if user_uprn:
53-
address_option = WebDriverWait(driver, 10).until(
54-
EC.element_to_be_clickable((By.XPATH, f"//option[@value='{user_uprn}']"))
55-
)
56-
address_option.click()
57-
else:
58-
# Fallback to selecting by address text
59-
address_option = WebDriverWait(driver, 10).until(
60-
EC.element_to_be_clickable(
61-
(By.XPATH, f"//select[@id='addressSelect']//option[contains(., '{user_paon}')]")
62-
)
63-
)
64-
address_option.click()
6581

66-
# Wait a moment for the page to update after address selection
67-
import time
68-
time.sleep(2)
82+
output = response.text
6983

70-
# Wait for collection information to appear - try multiple possible selectors
71-
try:
72-
WebDriverWait(driver, 15).until(
73-
EC.presence_of_element_located((By.XPATH, "//h2[contains(text(), 'Your next collections')]"))
74-
)
75-
except:
76-
# Alternative wait for collection data structure
77-
WebDriverWait(driver, 10).until(
78-
EC.presence_of_element_located((By.XPATH, "//div[contains(@class, 'ant-row') and contains(@class, 'd-flex')]//h3[@class='text-white']"))
84+
decoded_bins = self.decode_response(output)
85+
data: dict[str, list[dict[str, str]]] = {}
86+
data["bins"] = list(
87+
map(
88+
lambda a: {
89+
"type": a["binType"],
90+
"collectionDate": a["collectionDay"].replace("-", "/"),
91+
},
92+
decoded_bins["collectionDay"],
7993
)
80-
81-
soup = BeautifulSoup(driver.page_source, features="html.parser")
82-
83-
# Find all collection items with the specific structure - try multiple class patterns
84-
collection_items = soup.find_all("div", class_=lambda x: x and "ant-col" in x and "ant-col-xs-12" in x)
85-
if not collection_items:
86-
# Fallback to finding items by structure
87-
collection_items = soup.find_all("div", class_=lambda x: x and "p-2" in x and "d-flex" in x and "flex-column" in x)
88-
89-
current_year = datetime.now().year
90-
current_month = datetime.now().month
91-
92-
for item in collection_items:
93-
# Extract bin type from h3 element
94-
bin_type_elem = item.find("h3", class_="text-white")
95-
# Extract date from div with specific classes
96-
date_elem = item.find("div", class_="text-white fw-bold")
97-
98-
if bin_type_elem and date_elem:
99-
bin_type = bin_type_elem.get_text().strip()
100-
date_text = date_elem.get_text().strip()
101-
102-
try:
103-
collection_date = datetime.strptime(date_text, "%A %d %B")
104-
if (current_month > 10) and (collection_date.month < 3):
105-
collection_date = collection_date.replace(year=(current_year + 1))
106-
else:
107-
collection_date = collection_date.replace(year=current_year)
108-
109-
dict_data = {
110-
"type": bin_type,
111-
"collectionDate": collection_date.strftime("%d/%m/%Y"),
112-
}
113-
data["bins"].append(dict_data)
114-
except ValueError:
115-
continue
94+
)
11695

11796
except Exception as e:
11897
# Here you can log the exception if needed
11998
print(f"An error occurred: {e}")
12099
# Optionally, re-raise the exception if you want it to propagate
121100
raise
122-
finally:
123-
# This block ensures that the driver is closed regardless of an exception
124-
if driver:
125-
driver.quit()
126101
return data
Lines changed: 82 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
1-
import json
1+
import time
2+
23
import requests
3-
from datetime import datetime
4+
from dateutil.relativedelta import relativedelta
45

56
from uk_bin_collection.uk_bin_collection.common import *
67
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
78

89

10+
# import the wonderful Beautiful Soup and the URL grabber
911
class CouncilClass(AbstractGetBinDataClass):
1012
"""
1113
Concrete classes have to implement all abstract operations of the
@@ -14,28 +16,84 @@ class CouncilClass(AbstractGetBinDataClass):
1416
"""
1517

1618
def parse_data(self, page: str, **kwargs) -> dict:
17-
user_uprn = kwargs.get("uprn")
18-
check_uprn(user_uprn)
19+
# Make a BS4 object
20+
uprn = kwargs.get("uprn")
21+
# usrn = kwargs.get("paon")
22+
check_uprn(uprn)
23+
# check_usrn(usrn)
1924
bindata = {"bins": []}
20-
21-
# Make API request
22-
api_url = f"https://east-herts.co.uk/api/services/{user_uprn}"
23-
response = requests.get(api_url)
24-
response.raise_for_status()
25-
26-
data = response.json()
27-
today = datetime.now().date()
28-
29-
for service in data.get("services", []):
30-
collection_date_str = service.get("collectionDate")
31-
if collection_date_str:
32-
collection_date = datetime.strptime(collection_date_str, "%Y-%m-%d").date()
33-
# Only include future dates
34-
if collection_date >= today:
35-
dict_data = {
36-
"type": service.get("binType", ""),
37-
"collectionDate": collection_date.strftime("%d/%m/%Y"),
25+
26+
# uprn = uprn.zfill(12)
27+
28+
SESSION_URL = "https://eastherts-self.achieveservice.com/authapi/isauthenticated?uri=https%253A%252F%252Feastherts-self.achieveservice.com%252FAchieveForms%252F%253Fmode%253Dfill%2526consentMessage%253Dyes%2526form_uri%253Dsandbox-publish%253A%252F%252FAF-Process-98782935-6101-4962-9a55-5923e76057b6%252FAF-Stage-dcd0ec18-dfb4-496a-a266-bd8fadaa28a7%252Fdefinition.json%2526process%253D1%2526process_uri%253Dsandbox-processes%253A%252F%252FAF-Process-98782935-6101-4962-9a55-5923e76057b6%2526process_id%253DAF-Process-98782935-6101-4962-9a55-5923e76057b6&hostname=eastherts-self.achieveservice.com&withCredentials=true"
29+
30+
API_URL = "https://eastherts-self.achieveservice.com/apibroker/runLookup"
31+
32+
headers = {
33+
"Content-Type": "application/json",
34+
"Accept": "*/*",
35+
"User-Agent": "Mozilla/5.0",
36+
"X-Requested-With": "XMLHttpRequest",
37+
"Referer": "https://eastherts-self.achieveservice.com/fillform/?iframe_id=fillform-frame-1&db_id=",
38+
}
39+
s = requests.session()
40+
r = s.get(SESSION_URL)
41+
r.raise_for_status()
42+
session_data = r.json()
43+
sid = session_data["auth-session"]
44+
params = {
45+
# unix_timestamp
46+
"_": str(int(time.time() * 1000)),
47+
"sid": sid,
48+
}
49+
50+
params = {
51+
"id": "683d9ff0e299d",
52+
"repeat_against": "",
53+
"noRetry": "true",
54+
"getOnlyTokens": "undefined",
55+
"log_id": "",
56+
"app_name": "AF-Renderer::Self",
57+
# unix_timestamp
58+
"_": str(int(time.time() * 1000)),
59+
"sid": sid,
60+
}
61+
62+
data = {
63+
"formValues": {
64+
"Collection Days": {
65+
"inputUPRN": {
66+
"value": uprn,
3867
}
39-
bindata["bins"].append(dict_data)
40-
68+
},
69+
}
70+
}
71+
72+
r = s.post(API_URL, json=data, headers=headers, params=params)
73+
r.raise_for_status()
74+
75+
data = r.json()
76+
rows_data = data["integration"]["transformed"]["rows_data"]["0"]
77+
if not isinstance(rows_data, dict):
78+
raise ValueError("Invalid data returned from API")
79+
80+
# Extract each service's relevant details for the bin schedule
81+
for key, value in rows_data.items():
82+
if key.endswith("NextDate"):
83+
BinType = key.replace("NextDate", "ServiceName")
84+
for key2, value2 in rows_data.items():
85+
if key2 == BinType:
86+
BinType = value2
87+
next_collection = datetime.strptime(
88+
remove_ordinal_indicator_from_date_string(value), "%A %d %B"
89+
).replace(year=datetime.now().year)
90+
if datetime.now().month == 12 and next_collection.month == 1:
91+
next_collection = next_collection + relativedelta(years=1)
92+
93+
dict_data = {
94+
"type": BinType,
95+
"collectionDate": next_collection.strftime(date_format),
96+
}
97+
bindata["bins"].append(dict_data)
98+
4199
return bindata

uk_bin_collection/uk_bin_collection/councils/HinckleyandBosworthBoroughCouncil.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,16 @@ def parse_data(self, page: str, **kwargs) -> dict:
2020
check_uprn(user_uprn)
2121
bindata = {"bins": []}
2222

23+
headers = {
24+
"Origin": "https://www.hinckley-bosworth.gov.uk",
25+
"Referer": "https://www.hinckley-bosworth.gov.uk",
26+
"User-Agent": "Mozilla/5.0",
27+
}
28+
2329
URI = f"https://www.hinckley-bosworth.gov.uk/set-location?id={user_uprn}&redirect=refuse&rememberloc="
2430

2531
# Make the GET request
26-
response = requests.get(URI)
32+
response = requests.get(URI, headers=headers)
2733

2834
# Parse the HTML
2935
soup = BeautifulSoup(response.content, "html.parser")

uk_bin_collection/uk_bin_collection/councils/IpswichBoroughCouncil.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,9 @@ class CouncilClass(AbstractGetBinDataClass):
3131
IBC_ENDPOINT = "https://app.ipswich.gov.uk/bin-collection/"
3232

3333
def transform_date(self, date_str):
34-
date_str = re.sub(r"(st|nd|rd|th)", "", date_str) # Remove ordinal suffixes
34+
date_str = re.sub(
35+
r"(\d{1,2})(st|nd|rd|th)", r"\1", date_str
36+
) # Remove ordinal suffixes
3537
date_obj = datetime.strptime(date_str, "%A %d %B %Y")
3638
return date_obj.strftime(date_format)
3739

0 commit comments

Comments
 (0)