robbrad
diff --git a/‎uk_bin_collection/tests/input.json‎
Lines changed: 1 addition & 1 deletion b/‎uk_bin_collection/tests/input.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎uk_bin_collection/uk_bin_collection/councils/BoltonCouncil.py‎
Lines changed: 1 addition & 1 deletion b/‎uk_bin_collection/uk_bin_collection/councils/BoltonCouncil.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎uk_bin_collection/uk_bin_collection/councils/EastHertsCouncil.py‎
Lines changed: 82 additions & 24 deletions b/‎uk_bin_collection/uk_bin_collection/councils/EastHertsCouncil.py‎
Lines changed: 82 additions & 24 deletions
diff --git a/‎uk_bin_collection/uk_bin_collection/councils/HinckleyandBosworthBoroughCouncil.py‎
Lines changed: 7 additions & 1 deletion b/‎uk_bin_collection/uk_bin_collection/councils/HinckleyandBosworthBoroughCouncil.py‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎uk_bin_collection/uk_bin_collection/councils/IpswichBoroughCouncil.py‎
Lines changed: 3 additions & 1 deletion b/‎uk_bin_collection/uk_bin_collection/councils/IpswichBoroughCouncil.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎uk_bin_collection/uk_bin_collection/councils/LichfieldDistrictCouncil.py‎
Lines changed: 7 additions & 1 deletion b/‎uk_bin_collection/uk_bin_collection/councils/LichfieldDistrictCouncil.py‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎uk_bin_collection/uk_bin_collection/councils/NorthEastLincs.py‎
Lines changed: 17 additions & 6 deletions b/‎uk_bin_collection/uk_bin_collection/councils/NorthEastLincs.py‎
Lines changed: 17 additions & 6 deletions
diff --git a/‎uk_bin_collection/uk_bin_collection/councils/NorwichCityCouncil.py‎
Lines changed: 67 additions & 66 deletions b/‎uk_bin_collection/uk_bin_collection/councils/NorwichCityCouncil.py‎
Lines changed: 67 additions & 66 deletions
@@ -253,7 +253,7 @@
         "postcode": "BL1 5PQ",
         "skip_get_url": true,
         "uprn": "100010886936",
-        "url": "https://carehomes.bolton.gov.uk/bins.aspx",
+        "url": "https://web.bolton.gov.uk/bins.aspx",
         "web_driver": "http://selenium:4444",
         "wiki_name": "Bolton",
         "wiki_note": "To get the UPRN, you will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search). Previously required a single field that was UPRN and full address; now requires UPRN and postcode as separate fields.",
 
@@ -35,7 +35,7 @@ def parse_data(self, page: str, **kwargs) -> dict:
             data = {"bins": []}
 
             # Get our initial session running
-            page = "https://carehomes.bolton.gov.uk/bins.aspx"
+            page = "https://web.bolton.gov.uk/bins.aspx"
 
             driver = create_webdriver(web_driver, headless, None, __name__)
             driver.get(page)
 
@@ -1,11 +1,13 @@
-import json
+import time
+
 import requests
-from datetime import datetime
+from dateutil.relativedelta import relativedelta
 
 from uk_bin_collection.uk_bin_collection.common import *
 from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
 
 
+# import the wonderful Beautiful Soup and the URL grabber
 class CouncilClass(AbstractGetBinDataClass):
     """
     Concrete classes have to implement all abstract operations of the
@@ -14,28 +16,84 @@ class CouncilClass(AbstractGetBinDataClass):
     """
 
     def parse_data(self, page: str, **kwargs) -> dict:
-        user_uprn = kwargs.get("uprn")
-        check_uprn(user_uprn)
+        # Make a BS4 object
+        uprn = kwargs.get("uprn")
+        # usrn = kwargs.get("paon")
+        check_uprn(uprn)
+        # check_usrn(usrn)
         bindata = {"bins": []}
-        
-        # Make API request
-        api_url = f"https://east-herts.co.uk/api/services/{user_uprn}"
-        response = requests.get(api_url)
-        response.raise_for_status()
-        
-        data = response.json()
-        today = datetime.now().date()
-        
-        for service in data.get("services", []):
-            collection_date_str = service.get("collectionDate")
-            if collection_date_str:
-                collection_date = datetime.strptime(collection_date_str, "%Y-%m-%d").date()
-                # Only include future dates
-                if collection_date >= today:
-                    dict_data = {
-                        "type": service.get("binType", ""),
-                        "collectionDate": collection_date.strftime("%d/%m/%Y"),
+
+        # uprn = uprn.zfill(12)
+
+        SESSION_URL = "https://eastherts-self.achieveservice.com/authapi/isauthenticated?uri=https%253A%252F%252Feastherts-self.achieveservice.com%252FAchieveForms%252F%253Fmode%253Dfill%2526consentMessage%253Dyes%2526form_uri%253Dsandbox-publish%253A%252F%252FAF-Process-98782935-6101-4962-9a55-5923e76057b6%252FAF-Stage-dcd0ec18-dfb4-496a-a266-bd8fadaa28a7%252Fdefinition.json%2526process%253D1%2526process_uri%253Dsandbox-processes%253A%252F%252FAF-Process-98782935-6101-4962-9a55-5923e76057b6%2526process_id%253DAF-Process-98782935-6101-4962-9a55-5923e76057b6&hostname=eastherts-self.achieveservice.com&withCredentials=true"
+
+        API_URL = "https://eastherts-self.achieveservice.com/apibroker/runLookup"
+
+        headers = {
+            "Content-Type": "application/json",
+            "Accept": "*/*",
+            "User-Agent": "Mozilla/5.0",
+            "X-Requested-With": "XMLHttpRequest",
+            "Referer": "https://eastherts-self.achieveservice.com/fillform/?iframe_id=fillform-frame-1&db_id=",
+        }
+        s = requests.session()
+        r = s.get(SESSION_URL)
+        r.raise_for_status()
+        session_data = r.json()
+        sid = session_data["auth-session"]
+        params = {
+            # unix_timestamp
+            "_": str(int(time.time() * 1000)),
+            "sid": sid,
+        }
+
+        params = {
+            "id": "683d9ff0e299d",
+            "repeat_against": "",
+            "noRetry": "true",
+            "getOnlyTokens": "undefined",
+            "log_id": "",
+            "app_name": "AF-Renderer::Self",
+            # unix_timestamp
+            "_": str(int(time.time() * 1000)),
+            "sid": sid,
+        }
+
+        data = {
+            "formValues": {
+                "Collection Days": {
+                    "inputUPRN": {
+                        "value": uprn,
                     }
-                    bindata["bins"].append(dict_data)
-        
+                },
+            }
+        }
+
+        r = s.post(API_URL, json=data, headers=headers, params=params)
+        r.raise_for_status()
+
+        data = r.json()
+        rows_data = data["integration"]["transformed"]["rows_data"]["0"]
+        if not isinstance(rows_data, dict):
+            raise ValueError("Invalid data returned from API")
+
+        # Extract each service's relevant details for the bin schedule
+        for key, value in rows_data.items():
+            if key.endswith("NextDate"):
+                BinType = key.replace("NextDate", "ServiceName")
+                for key2, value2 in rows_data.items():
+                    if key2 == BinType:
+                        BinType = value2
+                next_collection = datetime.strptime(
+                    remove_ordinal_indicator_from_date_string(value), "%A %d %B"
+                ).replace(year=datetime.now().year)
+                if datetime.now().month == 12 and next_collection.month == 1:
+                    next_collection = next_collection + relativedelta(years=1)
+
+                dict_data = {
+                    "type": BinType,
+                    "collectionDate": next_collection.strftime(date_format),
+                }
+                bindata["bins"].append(dict_data)
+
         return bindata
@@ -20,10 +20,16 @@ def parse_data(self, page: str, **kwargs) -> dict:
         check_uprn(user_uprn)
         bindata = {"bins": []}
 
+        headers = {
+            "Origin": "https://www.hinckley-bosworth.gov.uk",
+            "Referer": "https://www.hinckley-bosworth.gov.uk",
+            "User-Agent": "Mozilla/5.0",
+        }
+
         URI = f"https://www.hinckley-bosworth.gov.uk/set-location?id={user_uprn}&redirect=refuse&rememberloc="
 
         # Make the GET request
-        response = requests.get(URI)
+        response = requests.get(URI, headers=headers)
 
         # Parse the HTML
         soup = BeautifulSoup(response.content, "html.parser")
 
@@ -31,7 +31,9 @@ class CouncilClass(AbstractGetBinDataClass):
     IBC_ENDPOINT = "https://app.ipswich.gov.uk/bin-collection/"
 
     def transform_date(self, date_str):
-        date_str = re.sub(r"(st|nd|rd|th)", "", date_str)  # Remove ordinal suffixes
+        date_str = re.sub(
+            r"(\d{1,2})(st|nd|rd|th)", r"\1", date_str
+        )  # Remove ordinal suffixes
         date_obj = datetime.strptime(date_str, "%A %d %B %Y")
         return date_obj.strftime(date_format)
 
 
@@ -24,10 +24,16 @@ def parse_data(self, page: str, **kwargs) -> dict:
         def solve(s):
             return re.sub(r"(\d)(st|nd|rd|th)", r"\1", s)
 
+        headers = {
+            "Origin": "https://www.lichfielddc.gov.uk",
+            "Referer": "https://www.lichfielddc.gov.uk",
+            "User-Agent": "Mozilla/5.0",
+        }
+
         URI = f"https://www.lichfielddc.gov.uk/homepage/6/bin-collection-dates?uprn={user_uprn}"
 
         # Make the GET request
-        response = requests.get(URI)
+        response = requests.get(URI, headers=headers)
 
         soup = BeautifulSoup(response.text, "html.parser")
 
 
@@ -1,5 +1,7 @@
 import pandas as pd
+import requests
 from bs4 import BeautifulSoup
+
 from uk_bin_collection.uk_bin_collection.common import date_format
 from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
 
@@ -12,15 +14,26 @@ class CouncilClass(AbstractGetBinDataClass):
     """
 
     def parse_data(self, page: str, **kwargs) -> dict:
-        # Make a BS4 object
-        soup = BeautifulSoup(page.text, features="html.parser")
+        user_url = kwargs.get("url")
+
+        headers = {
+            "Origin": "https://www.nelincs.gov.uk",
+            "Referer": "https://www.nelincs.gov.uk",
+            "User-Agent": "Mozilla/5.0",
+        }
+
+        # Make the GET request
+        response = requests.get(user_url, headers=headers)
+
+        # Parse the HTML
+        soup = BeautifulSoup(response.content, "html.parser")
         soup.prettify()
 
         data = {"bins": []}
 
         # Get list items that can be seen on page
         for element in soup.find_all(
-            "li", {"class": "list-group-item p-0 p-3 bin-collection-item"}
+            "li", {"class": "border-0 list-group-item p-3 bg-light rounded p-2"}
         ):
             element_text = element.text.strip().split("\n\n")
             element_text = [x.strip() for x in element_text]
@@ -35,9 +48,7 @@ def parse_data(self, page: str, **kwargs) -> dict:
             data["bins"].append(dict_data)
 
         # Get hidden list items too
-        for element in soup.find_all(
-            "li", {"class": "list-group-item p-0 p-3 bin-collection-item d-none"}
-        ):
+        for element in soup.find_all("li", {"class": "border-0 list-group-item p-3"}):
             element_text = element.text.strip().split("\n\n")
             element_text = [x.strip() for x in element_text]
 
 
@@ -1,5 +1,3 @@
-import time
-
 import requests
 from bs4 import BeautifulSoup
 
@@ -17,76 +15,79 @@ class CouncilClass(AbstractGetBinDataClass):
 
     def parse_data(self, page: str, **kwargs) -> dict:
 
-        user_uprn = kwargs.get("uprn")
-        check_uprn(user_uprn)
+        user_postcode = kwargs.get("postcode")
+        user_paon = kwargs.get("paon")
+        check_postcode(user_postcode)
+        check_paon(user_paon)
         bindata = {"bins": []}
 
-        API_URL = "https://maps.norwich.gov.uk/arcgis/rest/services/MyNorwich/PropertyDetails/FeatureServer/2/query"
-
-        params = {
-            "f": "json",
-            "where": f"UPRN='{user_uprn}' or UPRN='0{user_uprn}'",
-            "returnGeometry": "true",
-            "spatialRel": "esriSpatialRelIntersects",
-            "geometryType": "esriGeometryPolygon",
-            "inSR": "4326",
-            "outFields": "*",
-            "outSR": "4326",
-            "resultRecordCount": "1000",
+        URI = "https://bnr-wrp.whitespacews.com/"
+
+        session = requests.Session()
+
+        # get link from first page as has some kind of unique hash
+        r = session.get(
+            URI,
+        )
+        r.raise_for_status()
+        soup = BeautifulSoup(r.text, features="html.parser")
+
+        alink = soup.find("a", text="View my collections")
+
+        if alink is None:
+            raise Exception("Initial page did not load correctly")
+
+        # greplace 'seq' query string to skip next step
+        nextpageurl = alink["href"].replace("seq=1", "seq=2")
+
+        data = {
+            "address_name_number": user_paon,
+            "address_postcode": user_postcode,
         }
 
-        r = requests.get(API_URL, params=params)
-
-        data = r.json()
-        data = data["features"][0]["attributes"]["WasteCollectionHtml"]
-        soup = BeautifulSoup(data, "html.parser")
-
-        alternateCheck = soup.find("p")
-        if alternateCheck.text.__contains__("alternate"):
-            alternateCheck = True
-        else:
-            alternateCheck = False
-
-        strong = soup.find_all("strong")
-        collections = []
-
-        if alternateCheck:
-            bin_types = strong[2].text.strip().replace(".", "").split(" and ")
-            for bin in bin_types:
-                collections.append(
-                    (
-                        bin.capitalize(),
-                        datetime.strptime(strong[1].text.strip(), date_format),
-                    )
-                )
-
-        else:
-            p_tag = soup.find_all("p")
-            i = 1
-            for p in p_tag:
-                bin_types = (
-                    p.text.split("Your ")[1].split(" is collected")[0].split(" and ")
-                )
-                for bin in bin_types:
-                    collections.append(
-                        (
-                            bin.capitalize(),
-                            datetime.strptime(strong[1].text.strip(), date_format),
-                        )
-                    )
-                i += 2
-
-        if len(strong) > 3:
-            collections.append(
-                ("Garden", datetime.strptime(strong[4].text.strip(), date_format))
-            )
-
-        ordered_data = sorted(collections, key=lambda x: x[1])
-        for item in ordered_data:
+        # get list of addresses
+        r = session.post(nextpageurl, data)
+        r.raise_for_status()
+
+        soup = BeautifulSoup(r.text, features="html.parser")
+
+        # get first address (if you don't enter enough argument values this won't find the right address)
+        alink = soup.find("div", id="property_list").find("a")
+
+        if alink is None:
+            raise Exception("Address not found")
+
+        nextpageurl = URI + alink["href"]
+
+        # get collection page
+        r = session.get(
+            nextpageurl,
+        )
+        r.raise_for_status()
+        soup = BeautifulSoup(r.text, features="html.parser")
+
+        if soup.find("span", id="waste-hint"):
+            raise Exception("No scheduled services at this address")
+
+        u1s = soup.find("section", id="scheduled-collections").find_all("u1")
+
+        for u1 in u1s:
+            lis = u1.find_all("li", recursive=False)
+
+            date = lis[1].text.replace("\n", "")
+            bin_type = lis[2].text.replace("\n", "")
+
             dict_data = {
-                "type": item[0] + " bin",
-                "collectionDate": item[1].strftime(date_format),
+                "type": bin_type,
+                "collectionDate": datetime.strptime(
+                    date,
+                    "%d/%m/%Y",
+                ).strftime(date_format),
             }
             bindata["bins"].append(dict_data)
 
+        bindata["bins"].sort(
+            key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
+        )
+
         return bindata