fix: #1570 - Slough Borough Council

m26dvd · m26dvd · commit d5516f6a1755 · 2025-09-20T23:16:58.000+01:00
diff --git a/uk_bin_collection/uk_bin_collection/councils/SloughBoroughCouncil.py b/uk_bin_collection/uk_bin_collection/councils/SloughBoroughCouncil.py
@@ -1,15 +1,18 @@
-import time
 import re
-import requests
+import time
 from datetime import datetime
+
+import requests
 from bs4 import BeautifulSoup
 from selenium.webdriver.common.by import By
 from selenium.webdriver.common.keys import Keys
 from selenium.webdriver.support import expected_conditions as EC
 from selenium.webdriver.support.ui import WebDriverWait
+
 from uk_bin_collection.uk_bin_collection.common import *
 from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
 
+
 def get_street_from_postcode(postcode: str, api_key: str) -> str:
     url = "https://maps.googleapis.com/maps/api/geocode/json"
     params = {"address": postcode, "key": api_key}
@@ -25,6 +28,7 @@ def get_street_from_postcode(postcode: str, api_key: str) -> str:
 
     raise ValueError("No street (route) found in the response.")
 
+
 class CouncilClass(AbstractGetBinDataClass):
     def parse_data(self, page: str, **kwargs) -> dict:
         driver = None
@@ -37,10 +41,10 @@ def parse_data(self, page: str, **kwargs) -> dict:
 
             headless = kwargs.get("headless")
             web_driver = kwargs.get("web_driver")
-            driver = create_webdriver(web_driver, headless, None, __name__)
+            UserAgent = "Mozilla/5.0"
+            driver = create_webdriver(web_driver, headless, UserAgent, __name__)
             page = "https://www.slough.gov.uk/bin-collections"
             driver.get(page)
-
             # Accept cookies
             WebDriverWait(driver, 10).until(
                 EC.element_to_be_clickable((By.ID, "ccc-recommended-settings"))
@@ -50,14 +54,20 @@ def parse_data(self, page: str, **kwargs) -> dict:
             address_input = WebDriverWait(driver, 10).until(
                 EC.presence_of_element_located((By.ID, "keyword_directory25"))
             )
-            user_address = get_street_from_postcode(user_postcode, "AIzaSyBDLULT7EIlNtHerswPtfmL15Tt3Oc0bV8")
+            user_address = get_street_from_postcode(
+                user_postcode, "AIzaSyBDLULT7EIlNtHerswPtfmL15Tt3Oc0bV8"
+            )
             address_input.send_keys(user_address + Keys.ENTER)
 
             # Wait for address results to load
             WebDriverWait(driver, 10).until(
-                EC.presence_of_all_elements_located((By.CSS_SELECTOR, "span.list__link-text"))
+                EC.presence_of_all_elements_located(
+                    (By.CSS_SELECTOR, "span.list__link-text")
+                )
+            )
+            span_elements = driver.find_elements(
+                By.CSS_SELECTOR, "span.list__link-text"
             )
-            span_elements = driver.find_elements(By.CSS_SELECTOR, "span.list__link-text")
 
             for span in span_elements:
                 if user_address.lower() in span.text.lower():
@@ -68,7 +78,9 @@ def parse_data(self, page: str, **kwargs) -> dict:
 
             # Wait for address detail page
             WebDriverWait(driver, 10).until(
-                EC.presence_of_element_located((By.CSS_SELECTOR, "section.site-content"))
+                EC.presence_of_element_located(
+                    (By.CSS_SELECTOR, "section.site-content")
+                )
             )
             soup = BeautifulSoup(driver.page_source, "html.parser")
 
@@ -86,28 +98,33 @@ def parse_data(self, page: str, **kwargs) -> dict:
                             bin_url = "https://www.slough.gov.uk" + bin_url
 
                         # Visit the child page
-                        print(f"Navigating to {bin_url}")
+                        # print(f"Navigating to {bin_url}")
                         driver.get(bin_url)
                         WebDriverWait(driver, 10).until(
-                            EC.presence_of_element_located((By.CSS_SELECTOR, "div.page-content"))
+                            EC.presence_of_element_located(
+                                (By.CSS_SELECTOR, "div.page-content")
+                            )
                         )
                         child_soup = BeautifulSoup(driver.page_source, "html.parser")
 
                         editor_div = child_soup.find("div", class_="editor")
                         if not editor_div:
-                            print("No editor div found on bin detail page.")
+                            # print("No editor div found on bin detail page.")
                             continue
 
                         ul = editor_div.find("ul")
                         if not ul:
-                            print("No <ul> with dates found in editor div.")
+                            # print("No <ul> with dates found in editor div.")
                             continue
 
                     for li in ul.find_all("li"):
                         raw_text = li.get_text(strip=True).replace(".", "")
 
-                        if "no collection" in raw_text.lower() or "no collections" in raw_text.lower():
-                            print(f"Ignoring non-collection note: {raw_text}")
+                        if (
+                            "no collection" in raw_text.lower()
+                            or "no collections" in raw_text.lower()
+                        ):
+                            # print(f"Ignoring non-collection note: {raw_text}")
                             continue
 
                         raw_date = raw_text
@@ -117,24 +134,25 @@ def parse_data(self, page: str, **kwargs) -> dict:
                         except ValueError:
                             raw_date_cleaned = raw_date.split("(")[0].strip()
                             try:
-                                parsed_date = datetime.strptime(raw_date_cleaned, "%d %B %Y")
+                                parsed_date = datetime.strptime(
+                                    raw_date_cleaned, "%d %B %Y"
+                                )
                             except Exception:
                                 print(f"Could not parse date: {raw_text}")
                                 continue
 
                         formatted_date = parsed_date.strftime("%d/%m/%Y")
                         contains_date(formatted_date)
-                        bin_data["bins"].append({
-                            "type": bin_type,
-                            "collectionDate": formatted_date
-                        })
+                        bin_data["bins"].append(
+                            {"type": bin_type, "collectionDate": formatted_date}
+                        )
 
-                        print(f"Type: {bin_type}, Date: {formatted_date}") 
+                        # print(f"Type: {bin_type}, Date: {formatted_date}")
 
         except Exception as e:
             print(f"An error occurred: {e}")
             raise
         finally:
             if driver:
                 driver.quit()
-        return bin_data
+        return bin_data