Skip to content

Commit 4acf729

Browse files
committed
fix: South Ribble and version pinning issues for input.json
1 parent d9fb6a6 commit 4acf729

File tree

5 files changed

+115
-62
lines changed

5 files changed

+115
-62
lines changed

custom_components/uk_bin_collection/config_flow.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,10 @@
1212

1313
import collections # At the top with other imports
1414

15-
from .const import DOMAIN, LOG_PREFIX, SELENIUM_SERVER_URLS, BROWSER_BINARIES
15+
from .const import DOMAIN, LOG_PREFIX, SELENIUM_SERVER_URLS, BROWSER_BINARIES, INPUT_JSON_URL
1616

1717
_LOGGER = logging.getLogger(__name__)
1818

19-
2019
class UkBinCollectionConfigFlow(config_entries.ConfigFlow, domain=DOMAIN):
2120
"""Handle a config flow for UkBinCollection."""
2221

@@ -253,10 +252,9 @@ async def async_step_reconfigure_confirm(
253252

254253
async def get_councils_json(self) -> Dict[str, Any]:
255254
"""Fetch and return the supported councils data, including aliases and sorted alphabetically."""
256-
url = "https://raw.githubusercontent.com/robbrad/UKBinCollectionData/0.152.4/uk_bin_collection/tests/input.json"
257255
try:
258256
async with aiohttp.ClientSession() as session:
259-
async with session.get(url) as response:
257+
async with session.get(INPUT_JSON_URL) as response:
260258
response.raise_for_status()
261259
data_text = await response.text()
262260
original_data = json.loads(data_text)
@@ -569,10 +567,9 @@ async def async_step_init(self, user_input=None):
569567

570568
async def get_councils_json(self) -> Dict[str, Any]:
571569
"""Fetch and return the supported councils data."""
572-
url = "https://raw.githubusercontent.com/robbrad/UKBinCollectionData/0.111.0/uk_bin_collection/tests/input.json"
573570
try:
574571
async with aiohttp.ClientSession() as session:
575-
async with session.get(url) as response:
572+
async with session.get(INPUT_JSON_URL) as response:
576573
response.raise_for_status()
577574
data_text = await response.text()
578575
return json.loads(data_text)

custom_components/uk_bin_collection/const.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44

55
from homeassistant.const import Platform
66

7+
INPUT_JSON_URL = "https://raw.githubusercontent.com/robbrad/UKBinCollectionData/0.152.4/uk_bin_collection/tests/input.json"
8+
79
DEFAULT_NAME = "UK Bin Collection Data"
810

911
DOMAIN = "uk_bin_collection"

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,6 @@ version_scheme = "semver"
7171
version_files = [
7272
"custom_components/uk_bin_collection/manifest.json:version",
7373
"custom_components/uk_bin_collection/manifest.json:requirements",
74-
"custom_components/uk_bin_collection/config_flow.py:githubusercontent"
74+
"custom_components/uk_bin_collection/const.py:INPUT_JSON_URL"
7575
]
7676

uk_bin_collection/tests/input.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2181,7 +2181,7 @@
21812181
},
21822182
"SouthRibbleCouncil": {
21832183
"uprn": "10013243496",
2184-
"postcode": "PR26 7RZ",
2184+
"postcode": "PR266QW",
21852185
"url": "https://forms.chorleysouthribble.gov.uk/xfp/form/70",
21862186
"wiki_command_url_override": "https://forms.chorleysouthribble.gov.uk/xfp/form/70",
21872187
"wiki_name": "South Ribble",
Lines changed: 108 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1,76 +1,130 @@
1-
import requests
1+
from typing import Dict, List, Any, Optional
22
from bs4 import BeautifulSoup
3-
4-
from uk_bin_collection.uk_bin_collection.common import *
3+
from dateutil.relativedelta import relativedelta
4+
import requests
5+
import re
6+
from datetime import datetime
7+
from uk_bin_collection.uk_bin_collection.common import check_uprn, check_postcode, date_format
58
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
9+
from dateutil.parser import parse
610

711

8-
# import the wonderful Beautiful Soup and the URL grabber
912
class CouncilClass(AbstractGetBinDataClass):
10-
"""
11-
Concrete classes have to implement all abstract operations of the
12-
base class. They can also override some operations with a default
13-
implementation.
14-
"""
13+
def get_data(self, url: str) -> str:
14+
# This method is not used in the current implementation
15+
return ""
1516

16-
def parse_data(self, page: str, **kwargs) -> dict:
17+
def parse_data(self, page: str, **kwargs: Any) -> Dict[str, List[Dict[str, str]]]:
18+
postcode: Optional[str] = kwargs.get("postcode")
19+
uprn: Optional[str] = kwargs.get("uprn")
1720

18-
user_uprn = kwargs.get("uprn")
19-
user_postcode = kwargs.get("postcode")
20-
check_uprn(user_uprn)
21-
check_postcode(user_postcode)
22-
bindata = {"bins": []}
21+
if postcode is None or uprn is None:
22+
raise ValueError("Both postcode and UPRN are required.")
2323

24-
session_uri = "https://forms.chorleysouthribble.gov.uk/xfp/form/70"
25-
URI = "https://forms.chorleysouthribble.gov.uk/xfp/form/70#qc576c657112a8277ba6f954ebc0490c946168363_0"
24+
check_postcode(postcode)
25+
check_uprn(uprn)
2626

2727
session = requests.Session()
28-
token_response = session.get(session_uri)
29-
soup = BeautifulSoup(token_response.text, "html.parser")
30-
token = soup.find("input", {"name": "__token"}).attrs["value"]
31-
32-
form_data = {
33-
"__token": token,
34-
"page": "196",
35-
"locale": "en_GB",
36-
"qc576c657112a8277ba6f954ebc0490c946168363_0_0": user_postcode,
37-
"qc576c657112a8277ba6f954ebc0490c946168363_1_0": user_uprn,
38-
"next": "Next",
28+
headers = {
29+
"User-Agent": (
30+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
31+
"(KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36"
32+
)
3933
}
34+
session.headers.update(headers)
35+
36+
# Step 1: Load form and get token + field names
37+
initial_url = "https://forms.chorleysouthribble.gov.uk/xfp/form/70"
38+
get_resp = session.get(initial_url)
39+
soup = BeautifulSoup(get_resp.text, "html.parser")
40+
41+
token = soup.find("input", {"name": "__token"})["value"]
42+
page_id = soup.find("input", {"name": "page"})["value"]
43+
postcode_field = soup.find("input", {"type": "text", "name": re.compile(".*_0_0")})["name"]
44+
45+
# Step 2: Submit postcode
46+
post_resp = session.post(
47+
initial_url,
48+
data={
49+
"__token": token,
50+
"page": page_id,
51+
"locale": "en_GB",
52+
postcode_field: postcode,
53+
"next": "Next",
54+
},
55+
)
4056

41-
collection_response = session.post(URI, data=form_data)
57+
soup = BeautifulSoup(post_resp.text, "html.parser")
58+
token = soup.find("input", {"name": "__token"})["value"]
59+
address_field_el = soup.find("select", {"name": re.compile(".*_1_0")})
60+
if not address_field_el:
61+
raise ValueError("Failed to find address dropdown after postcode submission.")
4262

43-
#collection_soup = BeautifulSoup(collection_response.text, "html.parser")
44-
63+
address_field = address_field_el["name"]
4564

46-
soup = BeautifulSoup(collection_response.text, "html.parser")
47-
#print(soup)
65+
# Step 3: Submit UPRN and retrieve bin data
66+
final_resp = session.post(
67+
initial_url,
68+
data={
69+
"__token": token,
70+
"page": page_id,
71+
"locale": "en_GB",
72+
postcode_field: postcode,
73+
address_field: uprn,
74+
"next": "Next",
75+
},
76+
)
4877

49-
rows = soup.find("table").find_all("tr")
78+
soup = BeautifulSoup(final_resp.text, "html.parser")
79+
table = soup.find("table", class_="data-table")
80+
if not table:
81+
raise ValueError("Could not find bin collection table.")
5082

51-
# Form a JSON wrapper
83+
rows = table.find("tbody").find_all("tr")
5284
data: Dict[str, List[Dict[str, str]]] = {"bins": []}
5385

54-
# Loops the Rows
86+
# Extract bin type mapping from JavaScript
87+
bin_type_map = {}
88+
scripts = soup.find_all("script", type="text/javascript")
89+
for script in scripts:
90+
if script.string and "const bintype = {" in script.string:
91+
match = re.search(r'const bintype = \{([^}]+)\}', script.string, re.DOTALL)
92+
if match:
93+
bintype_content = match.group(1)
94+
for line in bintype_content.split('\n'):
95+
line = line.strip()
96+
if '"' in line and ':' in line:
97+
parts = line.split(':', 1)
98+
if len(parts) == 2:
99+
key = parts[0].strip().strip('"').strip("'")
100+
value = parts[1].strip().rstrip(',').strip().strip('"').strip("'")
101+
bin_type_map[key] = value
102+
break
103+
55104
for row in rows:
56105
cells = row.find_all("td")
57-
58-
if cells:
59-
bin_type = cells[0].get_text(strip=True)
60-
collection_next = cells[1].get_text(strip=True)
61-
62-
if len(collection_next) != 1:
63-
collection_date_obj = datetime.strptime(collection_next, "%d/%m/%y").date()
64-
# since we only have the next collection day, if the parsed date is in the past,
65-
# assume the day is instead next month
66-
if collection_date_obj < datetime.now().date():
67-
collection_date_obj += relativedelta(months=1)
68-
# Make each Bin element in the JSON
69-
dict_data = {
106+
if len(cells) >= 2:
107+
bin_type_cell = cells[0]
108+
bin_type = bin_type_cell.get_text(strip=True)
109+
bin_type = bin_type_map.get(bin_type, bin_type)
110+
111+
date_text = cells[1].get_text(strip=True)
112+
date_parts = date_text.split(", ")
113+
date_str = date_parts[1] if len(date_parts) == 2 else date_text
114+
115+
try:
116+
day, month, year = date_str.split('/')
117+
year = int(year)
118+
if year < 100:
119+
year = 2000 + year
120+
121+
date_obj = datetime(year, int(month), int(day)).date()
122+
123+
data["bins"].append({
70124
"type": bin_type,
71-
"collectionDate": collection_date_obj.strftime("%d/%m/%Y"),
72-
}
73-
# Add data to the main JSON Wrapper
74-
data["bins"].append(dict_data)
75-
continue
125+
"collectionDate": date_obj.strftime(date_format)
126+
})
127+
except Exception:
128+
continue
129+
76130
return data

0 commit comments

Comments
 (0)