fix: East Herts

robbrad · robbrad · commit 80c8b1507200 · 2025-08-03T18:08:53.000+01:00
diff --git a/uk_bin_collection/tests/input.json b/uk_bin_collection/tests/input.json
@@ -761,13 +761,11 @@
     },
     "EastHertsCouncil": {
         "LAD24CD": "E07000097",
-        "house_number": "1",
-        "postcode": "CM20 2FZ",
         "skip_get_url": true,
-        "url": "https://www.eastherts.gov.uk",
-        "web_driver": "http://selenium:4444",
+        "uprn": "10023088183",
+        "url": "https://east-herts.co.uk/api/services/",
         "wiki_name": "East Herts Council",
-        "wiki_note": "Pass the house number and postcode in their respective parameters."
+        "wiki_note": "Pass the UPRN. You can find it using [FindMyAddress](https://www.findmyaddress.co.uk/search)."
     },
     "EastLindseyDistrictCouncil": {
         "house_number": "1",
diff --git a/uk_bin_collection/uk_bin_collection/councils/EastHertsCouncil.py b/uk_bin_collection/uk_bin_collection/councils/EastHertsCouncil.py
@@ -1,7 +1,6 @@
-from bs4 import BeautifulSoup
-from selenium.webdriver.common.by import By
-from selenium.webdriver.support import expected_conditions as EC
-from selenium.webdriver.support.wait import WebDriverWait
+import json
+import requests
+from datetime import datetime
 
 from uk_bin_collection.uk_bin_collection.common import *
 from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
@@ -15,116 +14,28 @@ class CouncilClass(AbstractGetBinDataClass):
     """
 
     def parse_data(self, page: str, **kwargs) -> dict:
-        # Get and check UPRN
-        driver = None
-        try:
-            user_postcode = kwargs.get("postcode")
-            user_paon = kwargs.get("paon")
-            check_paon(user_paon)
-            check_postcode(user_postcode)
-            web_driver = kwargs.get("web_driver")
-            headless = kwargs.get("headless")
-            bindata = {"bins": []}
-
-            API_URL = "https://uhte-wrp.whitespacews.com"
-
-            # Create Selenium webdriver
-            driver = create_webdriver(web_driver, headless, None, __name__)
-            driver.get(API_URL)
-
-            # Click Find my bin collection day button
-            collectionButton = WebDriverWait(driver, 10).until(
-                EC.element_to_be_clickable((By.LINK_TEXT, "Find my bin collection day"))
-            )
-            collectionButton.click()
-
-            main_content = WebDriverWait(driver, 10).until(
-                EC.presence_of_element_located((By.ID, "main-content"))
-            )
-
-            # Wait for the property number field to appear then populate it
-            inputElement_number = WebDriverWait(driver, 10).until(
-                EC.element_to_be_clickable(
-                    (
-                        By.ID,
-                        "address_name_number",
-                    )
-                )
-            )
-            inputElement_number.send_keys(user_paon)
-
-            # Wait for the postcode field to appear then populate it
-            inputElement_postcode = WebDriverWait(driver, 10).until(
-                EC.element_to_be_clickable(
-                    (
-                        By.ID,
-                        "address_postcode",
-                    )
-                )
-            )
-            inputElement_postcode.send_keys(user_postcode)
-
-            # Click search button
-            continueButton = WebDriverWait(driver, 10).until(
-                EC.element_to_be_clickable(
-                    (
-                        By.ID,
-                        "Submit",
-                    )
-                )
-            )
-            continueButton.click()
-
-            # Wait for the 'Search Results' to appear and select the first result
-            property = WebDriverWait(driver, 10).until(
-                EC.element_to_be_clickable(
-                    (
-                        By.CSS_SELECTOR,
-                        "li.app-subnav__section-item a",
-                        # "app-subnav__link govuk-link clicker colordarkblue fontfamilyArial fontsize12rem",
-                        # "//a[starts-with(@aria-label, '{user_paon}')]",
-                    )
-                )
-            )
-            property.click()
-
-            upcoming_scheduled_collections = WebDriverWait(driver, 10).until(
-                EC.presence_of_element_located(
-                    (By.ID, "upcoming-scheduled-collections")
-                )
-            )
-
-            soup = BeautifulSoup(driver.page_source, features="html.parser")
-
-            collections = []
-            for collection in soup.find_all(
-                "u1",
-                class_="displayinlineblock justifycontentleft alignitemscenter margin0 padding0",
-            ):
-                date = collection.find(
-                    "p", string=lambda text: text and "/" in text
-                ).text.strip()  # Extract date
-                service = collection.find(
-                    "p", string=lambda text: text and "Collection Service" in text
-                ).text.strip()  # Extract service type
-                collections.append({"date": date, "service": service})
-
-            # Print the parsed data
-            for item in collections:
-
-                dict_data = {
-                    "type": item["service"],
-                    "collectionDate": item["date"],
-                }
-                bindata["bins"].append(dict_data)
-
-        except Exception as e:
-            # Here you can log the exception if needed
-            print(f"An error occurred: {e}")
-            # Optionally, re-raise the exception if you want it to propagate
-            raise
-        finally:
-            # This block ensures that the driver is closed regardless of an exception
-            if driver:
-                driver.quit()
+        user_uprn = kwargs.get("uprn")
+        check_uprn(user_uprn)
+        bindata = {"bins": []}
+        
+        # Make API request
+        api_url = f"https://east-herts.co.uk/api/services/{user_uprn}"
+        response = requests.get(api_url)
+        response.raise_for_status()
+        
+        data = response.json()
+        today = datetime.now().date()
+        
+        for service in data.get("services", []):
+            collection_date_str = service.get("collectionDate")
+            if collection_date_str:
+                collection_date = datetime.strptime(collection_date_str, "%Y-%m-%d").date()
+                # Only include future dates
+                if collection_date >= today:
+                    dict_data = {
+                        "type": service.get("binType", ""),
+                        "collectionDate": collection_date.strftime("%d/%m/%Y"),
+                    }
+                    bindata["bins"].append(dict_data)
+        
         return bindata
diff --git a/uk_bin_collection/uk_bin_collection/councils/EastRenfrewshireCouncil.py b/uk_bin_collection/uk_bin_collection/councils/EastRenfrewshireCouncil.py
@@ -2,12 +2,12 @@
 from selenium.webdriver.common.by import By
 from selenium.webdriver.support import expected_conditions as EC
 from selenium.webdriver.support.wait import WebDriverWait
+from selenium.webdriver.support.ui import Select
 
 from uk_bin_collection.uk_bin_collection.common import *
 from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
 
 
-# import the wonderful Beautiful Soup and the URL grabber
 class CouncilClass(AbstractGetBinDataClass):
     """
     Concrete classes have to implement all abstract operations of the
@@ -21,97 +21,102 @@ def parse_data(self, page: str, **kwargs) -> dict:
             data = {"bins": []}
             user_paon = kwargs.get("paon")
             user_postcode = kwargs.get("postcode")
+            user_uprn = kwargs.get("uprn")
             web_driver = kwargs.get("web_driver")
             headless = kwargs.get("headless")
-            check_paon(user_paon)
             check_postcode(user_postcode)
 
             # Create Selenium webdriver
             driver = create_webdriver(web_driver, headless, None, __name__)
-            driver.get(
-                "https://eastrenfrewshire.gov.uk/article/1145/Bin-collection-days"
-            )
+            driver.get("https://eastrenfrewshire.gov.uk/bin-days")
 
             # Wait for the postcode field to appear then populate it
             inputElement_postcode = WebDriverWait(driver, 30).until(
                 EC.presence_of_element_located(
-                    (By.ID, "RESIDUALWASTEV2_PAGE1_POSTCODE")
+                    (By.CSS_SELECTOR, "input[autocomplete='postal-code']")
                 )
             )
             inputElement_postcode.send_keys(user_postcode)
 
             # Click search button
-            findAddress = WebDriverWait(driver, 10).until(
-                EC.presence_of_element_located(
-                    (By.ID, "RESIDUALWASTEV2_PAGE1_FIELD199_NEXT")
-                )
-            )
-            findAddress.click()
-
-            # Wait for the 'Select address' dropdown to appear and select option matching the house name/number
-            WebDriverWait(driver, 10).until(
+            search_button = WebDriverWait(driver, 10).until(
                 EC.element_to_be_clickable(
-                    (
-                        By.XPATH,
-                        "//select[@id='RESIDUALWASTEV2_PAGE2_UPRN']//option[contains(., '"
-                        + user_paon
-                        + "')]",
-                    )
+                    (By.XPATH, "//button[text()='Search']")
                 )
-            ).click()
+            )
+            search_button.click()
 
-            # Click search button
-            findDates = WebDriverWait(driver, 10).until(
+            # Wait for the addresses dropdown to appear
+            addresses_select = WebDriverWait(driver, 10).until(
                 EC.presence_of_element_located(
-                    (By.ID, "RESIDUALWASTEV2_PAGE2_FIELD206_NEXT")
+                    (By.XPATH, "//label[text()='Addresses']/following-sibling::select")
                 )
             )
-            findDates.click()
+            
+            # Select the appropriate address based on UPRN or house number
+            select = Select(addresses_select)
+            if user_uprn:
+                # Select by UPRN value
+                select.select_by_value(user_uprn)
+            elif user_paon:
+                # Select by house number/name in the text
+                for option in select.options:
+                    if user_paon in option.text:
+                        select.select_by_visible_text(option.text)
+                        break
+            else:
+                # Select the first non-default option
+                select.select_by_index(1)
+
+            # Click the "Find my collection dates" button
+            find_dates_button = WebDriverWait(driver, 10).until(
+                EC.element_to_be_clickable(
+                    (By.XPATH, "//button[text()='Find my collection dates']")
+                )
+            )
+            find_dates_button.click()
 
-            # Wait for the collections table to appear
+            # Wait for the results table to appear
             WebDriverWait(driver, 10).until(
                 EC.presence_of_element_located(
-                    (By.ID, "RESIDUALWASTEV2_COLLECTIONDATES_DISPLAYBINCOLLECTIONINFO")
+                    (By.XPATH, "//th[text()='Bin Type']")
                 )
             )
 
             soup = BeautifulSoup(driver.page_source, features="html.parser")
-            soup.prettify()
-
-            # Get collections div
-            next_collection_div = soup.find("div", {"id": "yourNextCollection"})
-
-            # Get next collection date
-            next_collection_date = datetime.strptime(
-                next_collection_div.find("span", {"class": "dueDate"})
-                .get_text()
-                .strip(),
-                "%d/%m/%Y",
-            )
-
-            # Get next collection bins
-            next_collection_bin = next_collection_div.findAll(
-                "span", {"class": "binColour"}
-            )
-
-            # Format results
-            for row in next_collection_bin:
-                dict_data = {
-                    "type": row.get_text().strip(),
-                    "collectionDate": next_collection_date.strftime("%d/%m/%Y"),
-                }
-                data["bins"].append(dict_data)
+            
+            # Find the table with bin collection data
+            table = soup.find("th", string="Bin Type").find_parent("table")
+            rows = table.find_all("tr")[1:]  # Skip header row
+            
+            for row in rows:
+                cells = row.find_all("td")
+                if len(cells) >= 3:
+                    date_cell = cells[0].get_text().strip()
+                    bin_type_cell = cells[2]
+                    
+                    # Only process rows that have a date
+                    if date_cell:
+                        # Get all text content including line breaks
+                        bin_type_text = bin_type_cell.get_text(separator='\n').strip()
+                        
+                        # Split multiple bin types that appear on separate lines
+                        bin_types = [bt.strip() for bt in bin_type_text.split('\n') if bt.strip()]
+                        
+                        for bin_type in bin_types:
+                            dict_data = {
+                                "type": bin_type,
+                                "collectionDate": date_cell,
+                            }
+                            data["bins"].append(dict_data)
 
             data["bins"].sort(
                 key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y")
             )
         except Exception as e:
-            # Here you can log the exception if needed
             print(f"An error occurred: {e}")
-            # Optionally, re-raise the exception if you want it to propagate
             raise
         finally:
-            # This block ensures that the driver is closed regardless of an exception
             if driver:
                 driver.quit()
         return data