Merge pull request #1553 from m26dvd/master

robbrad · web-flow · commit 12d239081569 · 2025-09-02T22:33:09.000+01:00
fix: Council Fix Pack - August 2025
diff --git a/uk_bin_collection/uk_bin_collection/councils/EastHertsCouncil.py b/uk_bin_collection/uk_bin_collection/councils/EastHertsCouncil.py
@@ -1,11 +1,13 @@
-import json
+import time
+
 import requests
-from datetime import datetime
+from dateutil.relativedelta import relativedelta
 
 from uk_bin_collection.uk_bin_collection.common import *
 from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
 
 
+# import the wonderful Beautiful Soup and the URL grabber
 class CouncilClass(AbstractGetBinDataClass):
     """
     Concrete classes have to implement all abstract operations of the
@@ -14,28 +16,84 @@ class CouncilClass(AbstractGetBinDataClass):
     """
 
     def parse_data(self, page: str, **kwargs) -> dict:
-        user_uprn = kwargs.get("uprn")
-        check_uprn(user_uprn)
+        # Make a BS4 object
+        uprn = kwargs.get("uprn")
+        # usrn = kwargs.get("paon")
+        check_uprn(uprn)
+        # check_usrn(usrn)
         bindata = {"bins": []}
-        
-        # Make API request
-        api_url = f"https://east-herts.co.uk/api/services/{user_uprn}"
-        response = requests.get(api_url)
-        response.raise_for_status()
-        
-        data = response.json()
-        today = datetime.now().date()
-        
-        for service in data.get("services", []):
-            collection_date_str = service.get("collectionDate")
-            if collection_date_str:
-                collection_date = datetime.strptime(collection_date_str, "%Y-%m-%d").date()
-                # Only include future dates
-                if collection_date >= today:
-                    dict_data = {
-                        "type": service.get("binType", ""),
-                        "collectionDate": collection_date.strftime("%d/%m/%Y"),
+
+        # uprn = uprn.zfill(12)
+
+        SESSION_URL = "https://eastherts-self.achieveservice.com/authapi/isauthenticated?uri=https%253A%252F%252Feastherts-self.achieveservice.com%252FAchieveForms%252F%253Fmode%253Dfill%2526consentMessage%253Dyes%2526form_uri%253Dsandbox-publish%253A%252F%252FAF-Process-98782935-6101-4962-9a55-5923e76057b6%252FAF-Stage-dcd0ec18-dfb4-496a-a266-bd8fadaa28a7%252Fdefinition.json%2526process%253D1%2526process_uri%253Dsandbox-processes%253A%252F%252FAF-Process-98782935-6101-4962-9a55-5923e76057b6%2526process_id%253DAF-Process-98782935-6101-4962-9a55-5923e76057b6&hostname=eastherts-self.achieveservice.com&withCredentials=true"
+
+        API_URL = "https://eastherts-self.achieveservice.com/apibroker/runLookup"
+
+        headers = {
+            "Content-Type": "application/json",
+            "Accept": "*/*",
+            "User-Agent": "Mozilla/5.0",
+            "X-Requested-With": "XMLHttpRequest",
+            "Referer": "https://eastherts-self.achieveservice.com/fillform/?iframe_id=fillform-frame-1&db_id=",
+        }
+        s = requests.session()
+        r = s.get(SESSION_URL)
+        r.raise_for_status()
+        session_data = r.json()
+        sid = session_data["auth-session"]
+        params = {
+            # unix_timestamp
+            "_": str(int(time.time() * 1000)),
+            "sid": sid,
+        }
+
+        params = {
+            "id": "683d9ff0e299d",
+            "repeat_against": "",
+            "noRetry": "true",
+            "getOnlyTokens": "undefined",
+            "log_id": "",
+            "app_name": "AF-Renderer::Self",
+            # unix_timestamp
+            "_": str(int(time.time() * 1000)),
+            "sid": sid,
+        }
+
+        data = {
+            "formValues": {
+                "Collection Days": {
+                    "inputUPRN": {
+                        "value": uprn,
                     }
-                    bindata["bins"].append(dict_data)
-        
+                },
+            }
+        }
+
+        r = s.post(API_URL, json=data, headers=headers, params=params)
+        r.raise_for_status()
+
+        data = r.json()
+        rows_data = data["integration"]["transformed"]["rows_data"]["0"]
+        if not isinstance(rows_data, dict):
+            raise ValueError("Invalid data returned from API")
+
+        # Extract each service's relevant details for the bin schedule
+        for key, value in rows_data.items():
+            if key.endswith("NextDate"):
+                BinType = key.replace("NextDate", "ServiceName")
+                for key2, value2 in rows_data.items():
+                    if key2 == BinType:
+                        BinType = value2
+                next_collection = datetime.strptime(
+                    remove_ordinal_indicator_from_date_string(value), "%A %d %B"
+                ).replace(year=datetime.now().year)
+                if datetime.now().month == 12 and next_collection.month == 1:
+                    next_collection = next_collection + relativedelta(years=1)
+
+                dict_data = {
+                    "type": BinType,
+                    "collectionDate": next_collection.strftime(date_format),
+                }
+                bindata["bins"].append(dict_data)
+
         return bindata
diff --git a/uk_bin_collection/uk_bin_collection/councils/HinckleyandBosworthBoroughCouncil.py b/uk_bin_collection/uk_bin_collection/councils/HinckleyandBosworthBoroughCouncil.py
@@ -20,10 +20,16 @@ def parse_data(self, page: str, **kwargs) -> dict:
         check_uprn(user_uprn)
         bindata = {"bins": []}
 
+        headers = {
+            "Origin": "https://www.hinckley-bosworth.gov.uk",
+            "Referer": "https://www.hinckley-bosworth.gov.uk",
+            "User-Agent": "Mozilla/5.0",
+        }
+
         URI = f"https://www.hinckley-bosworth.gov.uk/set-location?id={user_uprn}&redirect=refuse&rememberloc="
 
         # Make the GET request
-        response = requests.get(URI)
+        response = requests.get(URI, headers=headers)
 
         # Parse the HTML
         soup = BeautifulSoup(response.content, "html.parser")
diff --git a/uk_bin_collection/uk_bin_collection/councils/IpswichBoroughCouncil.py b/uk_bin_collection/uk_bin_collection/councils/IpswichBoroughCouncil.py
@@ -31,7 +31,9 @@ class CouncilClass(AbstractGetBinDataClass):
     IBC_ENDPOINT = "https://app.ipswich.gov.uk/bin-collection/"
 
     def transform_date(self, date_str):
-        date_str = re.sub(r"(st|nd|rd|th)", "", date_str)  # Remove ordinal suffixes
+        date_str = re.sub(
+            r"(\d{1,2})(st|nd|rd|th)", r"\1", date_str
+        )  # Remove ordinal suffixes
         date_obj = datetime.strptime(date_str, "%A %d %B %Y")
         return date_obj.strftime(date_format)
 
diff --git a/uk_bin_collection/uk_bin_collection/councils/LichfieldDistrictCouncil.py b/uk_bin_collection/uk_bin_collection/councils/LichfieldDistrictCouncil.py
@@ -24,10 +24,16 @@ def parse_data(self, page: str, **kwargs) -> dict:
         def solve(s):
             return re.sub(r"(\d)(st|nd|rd|th)", r"\1", s)
 
+        headers = {
+            "Origin": "https://www.lichfielddc.gov.uk",
+            "Referer": "https://www.lichfielddc.gov.uk",
+            "User-Agent": "Mozilla/5.0",
+        }
+
         URI = f"https://www.lichfielddc.gov.uk/homepage/6/bin-collection-dates?uprn={user_uprn}"
 
         # Make the GET request
-        response = requests.get(URI)
+        response = requests.get(URI, headers=headers)
 
         soup = BeautifulSoup(response.text, "html.parser")
 
diff --git a/uk_bin_collection/uk_bin_collection/councils/NorthEastLincs.py b/uk_bin_collection/uk_bin_collection/councils/NorthEastLincs.py
@@ -1,5 +1,7 @@
 import pandas as pd
+import requests
 from bs4 import BeautifulSoup
+
 from uk_bin_collection.uk_bin_collection.common import date_format
 from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
 
@@ -12,15 +14,26 @@ class CouncilClass(AbstractGetBinDataClass):
     """
 
     def parse_data(self, page: str, **kwargs) -> dict:
-        # Make a BS4 object
-        soup = BeautifulSoup(page.text, features="html.parser")
+        user_url = kwargs.get("url")
+
+        headers = {
+            "Origin": "https://www.nelincs.gov.uk",
+            "Referer": "https://www.nelincs.gov.uk",
+            "User-Agent": "Mozilla/5.0",
+        }
+
+        # Make the GET request
+        response = requests.get(user_url, headers=headers)
+
+        # Parse the HTML
+        soup = BeautifulSoup(response.content, "html.parser")
         soup.prettify()
 
         data = {"bins": []}
 
         # Get list items that can be seen on page
         for element in soup.find_all(
-            "li", {"class": "list-group-item p-0 p-3 bin-collection-item"}
+            "li", {"class": "border-0 list-group-item p-3 bg-light rounded p-2"}
         ):
             element_text = element.text.strip().split("\n\n")
             element_text = [x.strip() for x in element_text]
@@ -35,9 +48,7 @@ def parse_data(self, page: str, **kwargs) -> dict:
             data["bins"].append(dict_data)
 
         # Get hidden list items too
-        for element in soup.find_all(
-            "li", {"class": "list-group-item p-0 p-3 bin-collection-item d-none"}
-        ):
+        for element in soup.find_all("li", {"class": "border-0 list-group-item p-3"}):
             element_text = element.text.strip().split("\n\n")
             element_text = [x.strip() for x in element_text]
 
diff --git a/uk_bin_collection/uk_bin_collection/councils/NuneatonBedworthBoroughCouncil.py b/uk_bin_collection/uk_bin_collection/councils/NuneatonBedworthBoroughCouncil.py
@@ -1,23 +1,29 @@
+import re
+import urllib.parse
+
+import requests
 from bs4 import BeautifulSoup
+
 from uk_bin_collection.uk_bin_collection.common import *
 from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
 
-from bs4 import BeautifulSoup
-import urllib.parse
-import requests
-import re
-
 
 class CouncilClass(AbstractGetBinDataClass):
     def parse_data(self, page: str, **kwargs) -> dict:
 
         data = {"bins": []}
 
+        headers = {
+            "Origin": "https://www.nuneatonandbedworth.gov.uk/",
+            "Referer": "https://www.nuneatonandbedworth.gov.uk/",
+            "User-Agent": "Mozilla/5.0",
+        }
+
         street = urllib.parse.quote_plus(kwargs.get("paon"))
         base_url = "https://www.nuneatonandbedworth.gov.uk/"
         search_query = f"directory/search?directoryID=3&showInMap=&keywords={street}&search=Search+directory"
 
-        search_response = requests.get(base_url + search_query)
+        search_response = requests.get(base_url + search_query, headers=headers)
 
         if search_response.status_code == 200:
             soup = BeautifulSoup(search_response.content, "html.parser")
@@ -56,7 +62,13 @@ def parse_data(self, page: str, **kwargs) -> dict:
 
     def get_bin_data(self, url) -> dict:
 
-        bin_day_response = requests.get(url)
+        headers = {
+            "Origin": "https://www.nuneatonandbedworth.gov.uk/",
+            "Referer": "https://www.nuneatonandbedworth.gov.uk/",
+            "User-Agent": "Mozilla/5.0",
+        }
+
+        bin_day_response = requests.get(url, headers=headers)
 
         if bin_day_response.status_code == 200:
 
diff --git a/uk_bin_collection/uk_bin_collection/councils/RunnymedeBoroughCouncil.py b/uk_bin_collection/uk_bin_collection/councils/RunnymedeBoroughCouncil.py
@@ -21,10 +21,16 @@ def parse_data(self, page: str, **kwargs) -> dict:
         check_uprn(user_uprn)
         bindata = {"bins": []}
 
+        headers = {
+            "Origin": "https://www.runnymede.gov.uk",
+            "Referer": "https://www.runnymede.gov.uk",
+            "User-Agent": "Mozilla/5.0",
+        }
+
         URI = f"https://www.runnymede.gov.uk/homepage/150/check-your-bin-collection-day?address={user_uprn}"
 
         # Make the GET request
-        response = requests.get(URI)
+        response = requests.get(URI, headers=headers)
 
         soup = BeautifulSoup(response.text, "html.parser")
 
diff --git a/uk_bin_collection/uk_bin_collection/councils/StaffordshireMoorlandsDistrictCouncil.py b/uk_bin_collection/uk_bin_collection/councils/StaffordshireMoorlandsDistrictCouncil.py
@@ -77,6 +77,10 @@ def parse_data(self, page: str, **kwargs) -> dict:
         )
         submit.click()
 
+        WebDriverWait(driver, 10).until(
+            EC.presence_of_element_located((By.CLASS_NAME, "bin-collection__month"))
+        )
+
         soup = BeautifulSoup(driver.page_source, features="html.parser")
 
         # Quit Selenium webdriver to release session
diff --git a/uk_bin_collection/uk_bin_collection/councils/WiltshireCouncil.py b/uk_bin_collection/uk_bin_collection/councils/WiltshireCouncil.py
@@ -1,3 +1,5 @@
+import re
+
 from bs4 import BeautifulSoup
 
 from uk_bin_collection.uk_bin_collection.common import *
@@ -91,40 +93,56 @@ def parse_data(self, page: str, **kwargs) -> dict:
 
             soup = BeautifulSoup(response.text, features="html.parser")
             soup.prettify()
-
+            # print(soup)
             # Find all the bits of the current calendar that contain an event
-            events = soup.find_all("div", {"class": "rc-event-container"})
+            resultscontainer = soup.find_all("div", {"class": "results-container"})
 
-            for event in events:
-                # Get the date and type of each bin collection
-                bin_date = datetime.strptime(
-                    event.find_next("a").attrs.get("data-original-datetext"),
-                    "%A %d %B, %Y",
+            for result in resultscontainer:
+                rows = result.find_all(
+                    "div", {"class": "col-12 col-sm-6 col-md-4 col-lg-4 mb-4"}
                 )
-                bin_type = event.find_next("a").attrs.get("data-original-title")
-                # Only process it if it's today or in the future
-                if bin_date.date() >= datetime.now().date():
-                    # Split the really long type up into two separate bins
-                    if (
-                        bin_type
-                        == "Mixed dry recycling (blue lidded bin) and glass (black box or basket)"
-                    ):
-                        collections.append(
-                            (
-                                "Mixed dry recycling (blue lidded bin)",
-                                datetime.strftime(bin_date, date_format),
+                for row in rows:
+                    cardcollectionday = row.find(
+                        "span", {"class": "card-collection-day"}
+                    )
+                    cardcollectiondate = row.find(
+                        "span", {"class": "card-collection-date"}
+                    )
+                    cardcollectionmonth = row.find(
+                        "span", {"class": "card-collection-month"}
+                    )
+                    bin_type = row.find(
+                        "li", {"class": re.compile(r"collection-type-...$")}
+                    ).text
+
+                    collection_date = f"{cardcollectionday.text}{cardcollectiondate.text}{cardcollectionmonth.text}"
+                    bin_date = datetime.strptime(
+                        collection_date,
+                        "%A %d %B %Y",
+                    )
+
+                    if bin_date.date() >= datetime.now().date():
+                        # Split the really long type up into two separate bins
+                        if (
+                            bin_type
+                            == "Mixed dry recycling (blue lidded bin) and glass (black box or basket)"
+                        ):
+                            collections.append(
+                                (
+                                    "Mixed dry recycling (blue lidded bin)",
+                                    datetime.strftime(bin_date, date_format),
+                                )
+                            )
+                            collections.append(
+                                (
+                                    "Glass (black box or basket)",
+                                    datetime.strftime(bin_date, date_format),
+                                )
                             )
-                        )
-                        collections.append(
-                            (
-                                "Glass (black box or basket)",
-                                datetime.strftime(bin_date, date_format),
+                        else:
+                            collections.append(
+                                (bin_type, datetime.strftime(bin_date, date_format))
                             )
-                        )
-                    else:
-                        collections.append(
-                            (bin_type, datetime.strftime(bin_date, date_format))
-                        )
 
         data = {"bins": []}
 
diff --git a/wiki/Councils.md b/wiki/Councils.md

Original file line number	Diff line number	Diff line change
`@@ -77,6 +77,10 @@ def parse_data(self, page: str, **kwargs) -> dict:`
`77`	`77`	`)`
`78`	`78`	`submit.click()`
`79`	`79`
	`80`	`+ WebDriverWait(driver, 10).until(`
	`81`	`+ EC.presence_of_element_located((By.CLASS_NAME, "bin-collection__month"))`
	`82`	`+ )`
	`83`	`+`
`80`	`84`	`soup = BeautifulSoup(driver.page_source, features="html.parser")`
`81`	`85`
`82`	`86`	`# Quit Selenium webdriver to release session`