fix: South Ribble and version pinning issues for input.json

robbrad · robbrad · commit 4acf72952e97 · 2025-06-07T13:52:44.000Z
diff --git a/custom_components/uk_bin_collection/config_flow.py b/custom_components/uk_bin_collection/config_flow.py
@@ -12,11 +12,10 @@
 
 import collections  # At the top with other imports
 
-from .const import DOMAIN, LOG_PREFIX, SELENIUM_SERVER_URLS, BROWSER_BINARIES
+from .const import DOMAIN, LOG_PREFIX, SELENIUM_SERVER_URLS, BROWSER_BINARIES, INPUT_JSON_URL
 
 _LOGGER = logging.getLogger(__name__)
 
-
 class UkBinCollectionConfigFlow(config_entries.ConfigFlow, domain=DOMAIN):
     """Handle a config flow for UkBinCollection."""
 
@@ -253,10 +252,9 @@ async def async_step_reconfigure_confirm(
 
     async def get_councils_json(self) -> Dict[str, Any]:
         """Fetch and return the supported councils data, including aliases and sorted alphabetically."""
-        url = "https://raw.githubusercontent.com/robbrad/UKBinCollectionData/0.152.4/uk_bin_collection/tests/input.json"
         try:
             async with aiohttp.ClientSession() as session:
-                async with session.get(url) as response:
+                async with session.get(INPUT_JSON_URL) as response:
                     response.raise_for_status()
                     data_text = await response.text()
                     original_data = json.loads(data_text)
@@ -569,10 +567,9 @@ async def async_step_init(self, user_input=None):
 
     async def get_councils_json(self) -> Dict[str, Any]:
         """Fetch and return the supported councils data."""
-        url = "https://raw.githubusercontent.com/robbrad/UKBinCollectionData/0.111.0/uk_bin_collection/tests/input.json"
         try:
             async with aiohttp.ClientSession() as session:
-                async with session.get(url) as response:
+                async with session.get(INPUT_JSON_URL) as response:
                     response.raise_for_status()
                     data_text = await response.text()
                     return json.loads(data_text)
diff --git a/custom_components/uk_bin_collection/const.py b/custom_components/uk_bin_collection/const.py
@@ -4,6 +4,8 @@
 
 from homeassistant.const import Platform
 
+INPUT_JSON_URL = "https://raw.githubusercontent.com/robbrad/UKBinCollectionData/0.152.4/uk_bin_collection/tests/input.json"
+
 DEFAULT_NAME = "UK Bin Collection Data"
 
 DOMAIN = "uk_bin_collection"
diff --git a/pyproject.toml b/pyproject.toml
@@ -71,6 +71,6 @@ version_scheme = "semver"
 version_files = [
     "custom_components/uk_bin_collection/manifest.json:version",
     "custom_components/uk_bin_collection/manifest.json:requirements",
-    "custom_components/uk_bin_collection/config_flow.py:githubusercontent"
+    "custom_components/uk_bin_collection/const.py:INPUT_JSON_URL"
 ]
 
diff --git a/uk_bin_collection/tests/input.json b/uk_bin_collection/tests/input.json
@@ -2181,7 +2181,7 @@
     },
     "SouthRibbleCouncil": {
         "uprn": "10013243496",
-        "postcode": "PR26 7RZ",
+        "postcode": "PR266QW",
         "url": "https://forms.chorleysouthribble.gov.uk/xfp/form/70",
         "wiki_command_url_override": "https://forms.chorleysouthribble.gov.uk/xfp/form/70",
         "wiki_name": "South Ribble",
diff --git a/uk_bin_collection/uk_bin_collection/councils/SouthRibbleCouncil.py b/uk_bin_collection/uk_bin_collection/councils/SouthRibbleCouncil.py
@@ -1,76 +1,130 @@
-import requests
+from typing import Dict, List, Any, Optional
 from bs4 import BeautifulSoup
-
-from uk_bin_collection.uk_bin_collection.common import *
+from dateutil.relativedelta import relativedelta
+import requests
+import re
+from datetime import datetime
+from uk_bin_collection.uk_bin_collection.common import check_uprn, check_postcode, date_format
 from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
+from dateutil.parser import parse
 
 
-# import the wonderful Beautiful Soup and the URL grabber
 class CouncilClass(AbstractGetBinDataClass):
-    """
-    Concrete classes have to implement all abstract operations of the
-    base class. They can also override some operations with a default
-    implementation.
-    """
+    def get_data(self, url: str) -> str:
+        # This method is not used in the current implementation
+        return ""
 
-    def parse_data(self, page: str, **kwargs) -> dict:
+    def parse_data(self, page: str, **kwargs: Any) -> Dict[str, List[Dict[str, str]]]:
+        postcode: Optional[str] = kwargs.get("postcode")
+        uprn: Optional[str] = kwargs.get("uprn")
 
-        user_uprn = kwargs.get("uprn")
-        user_postcode = kwargs.get("postcode")
-        check_uprn(user_uprn)
-        check_postcode(user_postcode)
-        bindata = {"bins": []}
+        if postcode is None or uprn is None:
+            raise ValueError("Both postcode and UPRN are required.")
 
-        session_uri = "https://forms.chorleysouthribble.gov.uk/xfp/form/70"
-        URI = "https://forms.chorleysouthribble.gov.uk/xfp/form/70#qc576c657112a8277ba6f954ebc0490c946168363_0"
+        check_postcode(postcode)
+        check_uprn(uprn)
 
         session = requests.Session()
-        token_response = session.get(session_uri)
-        soup = BeautifulSoup(token_response.text, "html.parser")
-        token = soup.find("input", {"name": "__token"}).attrs["value"]
-
-        form_data = {
-            "__token": token,
-            "page": "196",
-            "locale": "en_GB",
-            "qc576c657112a8277ba6f954ebc0490c946168363_0_0": user_postcode,
-            "qc576c657112a8277ba6f954ebc0490c946168363_1_0": user_uprn,
-            "next": "Next",
+        headers = {
+            "User-Agent": (
+                "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
+                "(KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36"
+            )
         }
+        session.headers.update(headers)
+
+        # Step 1: Load form and get token + field names
+        initial_url = "https://forms.chorleysouthribble.gov.uk/xfp/form/70"
+        get_resp = session.get(initial_url)
+        soup = BeautifulSoup(get_resp.text, "html.parser")
+
+        token = soup.find("input", {"name": "__token"})["value"]
+        page_id = soup.find("input", {"name": "page"})["value"]
+        postcode_field = soup.find("input", {"type": "text", "name": re.compile(".*_0_0")})["name"]
+
+        # Step 2: Submit postcode
+        post_resp = session.post(
+            initial_url,
+            data={
+                "__token": token,
+                "page": page_id,
+                "locale": "en_GB",
+                postcode_field: postcode,
+                "next": "Next",
+            },
+        )
 
-        collection_response = session.post(URI, data=form_data)
+        soup = BeautifulSoup(post_resp.text, "html.parser")
+        token = soup.find("input", {"name": "__token"})["value"]
+        address_field_el = soup.find("select", {"name": re.compile(".*_1_0")})
+        if not address_field_el:
+            raise ValueError("Failed to find address dropdown after postcode submission.")
 
-        #collection_soup = BeautifulSoup(collection_response.text, "html.parser")
-        
+        address_field = address_field_el["name"]
 
-        soup = BeautifulSoup(collection_response.text, "html.parser")
-        #print(soup)
+        # Step 3: Submit UPRN and retrieve bin data
+        final_resp = session.post(
+            initial_url,
+            data={
+                "__token": token,
+                "page": page_id,
+                "locale": "en_GB",
+                postcode_field: postcode,
+                address_field: uprn,
+                "next": "Next",
+            },
+        )
 
-        rows = soup.find("table").find_all("tr")
+        soup = BeautifulSoup(final_resp.text, "html.parser")
+        table = soup.find("table", class_="data-table")
+        if not table:
+            raise ValueError("Could not find bin collection table.")
 
-        # Form a JSON wrapper
+        rows = table.find("tbody").find_all("tr")
         data: Dict[str, List[Dict[str, str]]] = {"bins": []}
 
-        # Loops the Rows
+        # Extract bin type mapping from JavaScript
+        bin_type_map = {}
+        scripts = soup.find_all("script", type="text/javascript")
+        for script in scripts:
+            if script.string and "const bintype = {" in script.string:
+                match = re.search(r'const bintype = \{([^}]+)\}', script.string, re.DOTALL)
+                if match:
+                    bintype_content = match.group(1)
+                    for line in bintype_content.split('\n'):
+                        line = line.strip()
+                        if '"' in line and ':' in line:
+                            parts = line.split(':', 1)
+                            if len(parts) == 2:
+                                key = parts[0].strip().strip('"').strip("'")
+                                value = parts[1].strip().rstrip(',').strip().strip('"').strip("'")
+                                bin_type_map[key] = value
+                    break
+
         for row in rows:
             cells = row.find_all("td")
-            
-            if cells:
-                bin_type = cells[0].get_text(strip=True)
-                collection_next = cells[1].get_text(strip=True)
-
-                if len(collection_next) != 1:
-                    collection_date_obj = datetime.strptime(collection_next, "%d/%m/%y").date()
-                    # since we only have the next collection day, if the parsed date is in the past,
-                    # assume the day is instead next month
-                    if collection_date_obj < datetime.now().date():
-                        collection_date_obj += relativedelta(months=1)
-                    # Make each Bin element in the JSON
-                    dict_data = {
+            if len(cells) >= 2:
+                bin_type_cell = cells[0]
+                bin_type = bin_type_cell.get_text(strip=True)
+                bin_type = bin_type_map.get(bin_type, bin_type)
+
+                date_text = cells[1].get_text(strip=True)
+                date_parts = date_text.split(", ")
+                date_str = date_parts[1] if len(date_parts) == 2 else date_text
+
+                try:
+                    day, month, year = date_str.split('/')
+                    year = int(year)
+                    if year < 100:
+                        year = 2000 + year
+
+                    date_obj = datetime(year, int(month), int(day)).date()
+
+                    data["bins"].append({
                         "type": bin_type,
-                        "collectionDate": collection_date_obj.strftime("%d/%m/%Y"),
-                    }
-                    # Add data to the main JSON Wrapper
-                    data["bins"].append(dict_data)
-                    continue      
+                        "collectionDate": date_obj.strftime(date_format)
+                    })
+                except Exception:
+                    continue
+
         return data

Original file line number	Diff line number	Diff line change
`@@ -71,6 +71,6 @@ version_scheme = "semver"`
`71`	`71`	`version_files = [`
`72`	`72`	`"custom_components/uk_bin_collection/manifest.json:version",`
`73`	`73`	`"custom_components/uk_bin_collection/manifest.json:requirements",`
`74`		`- "custom_components/uk_bin_collection/config_flow.py:githubusercontent"`
	`74`	`+ "custom_components/uk_bin_collection/const.py:INPUT_JSON_URL"`
`75`	`75`	`]`
`76`	`76`