Merge pull request #1453 from davida72/Council-Fixes-(05-21)

robbrad · web-flow · commit 7eaaa0b46e99 · 2025-06-04T13:26:06.000+01:00
Council fixes (05 21)
diff --git a/uk_bin_collection/tests/input.json b/uk_bin_collection/tests/input.json
@@ -1693,9 +1693,10 @@
         "LAD24CD": "E06000012"
     },
     "NorthHertfordshireDistrictCouncil": {
-        "house_number": "2",
+        "house_number": "22",
         "postcode": "SG6 4BJ",
         "url": "https://www.north-herts.gov.uk",
+        "web_driver": "http://selenium:4444",
         "wiki_name": "North Hertfordshire",
         "wiki_note": "Pass the house number and postcode in their respective parameters.",
         "LAD24CD": "E07000099"
@@ -2180,6 +2181,7 @@
     },
     "SouthRibbleCouncil": {
         "uprn": "010013246384",
+        "postcode": "PR5 6DT",
         "url": "https://www.southribble.gov.uk",
         "wiki_command_url_override": "https://www.southribble.gov.uk",
         "wiki_name": "South Ribble",
diff --git a/uk_bin_collection/uk_bin_collection/councils/AdurAndWorthingCouncils.py b/uk_bin_collection/uk_bin_collection/councils/AdurAndWorthingCouncils.py
@@ -37,38 +37,55 @@ def parse_data(self, page: str, **kwargs) -> dict:
                 paragraphs = bin_row.find_all("p")
 
                 for p in paragraphs:
-                    if p.get_text() and "Next collection:" in p.get_text():
-                        date_str = p.get_text().replace("Next collection:", "").strip()
-                        # Extract day number from date string (e.g. "2" from "Friday 2nd May")
-                        day_number = int("".join(filter(str.isdigit, date_str)))
-                        # Replace ordinal in date string with plain number
-                        date_str = date_str.replace(
-                            get_date_with_ordinal(day_number), str(day_number)
+                    # Check for both singular and plural "Next collection(s):"
+                    if p.get_text() and (
+                        "Next collection:" in p.get_text()
+                        or "Next collections:" in p.get_text()
+                    ):
+                        # Extract collection dates
+                        date_text = (
+                            p.get_text()
+                            .replace("Next collection:", "")
+                            .replace("Next collections:", "")
+                            .strip()
                         )
 
-                        try:
-                            # Parse date with full format
-                            bin_date = datetime.strptime(date_str, "%A %d %B")
-
-                            # Add current year since it's not in the date string
-                            current_year = datetime.now().year
-                            bin_date = bin_date.replace(year=current_year)
-
-                            # If the date is in the past, it's probably for next year
-                            if bin_date < datetime.now():
-                                bin_date = bin_date.replace(year=current_year + 1)
-
-                            collections.append((bin_type, bin_date))
-                            print(
-                                f"Successfully parsed date for {bin_type}: {bin_date}"
-                            )
-                            break
-
-                        except ValueError as e:
-                            print(
-                                f"Failed to parse date '{date_str}' for {bin_type}: {e}"
-                            )
-                            continue
+                        # Split multiple dates if comma-separated
+                        date_strings = [date.strip() for date in date_text.split(",")]
+
+                        for date_str in date_strings:
+                            try:
+                                # Extract day number from date string (e.g. "2" from "Tuesday 27th May")
+                                day_number = int("".join(filter(str.isdigit, date_str)))
+                                # Replace ordinal in date string with plain number
+                                date_str = date_str.replace(
+                                    get_date_with_ordinal(day_number), str(day_number)
+                                )
+
+                                # Parse date with full format
+                                bin_date = datetime.strptime(date_str, "%A %d %B")
+
+                                # Add current year since it's not in the date string
+                                current_year = datetime.now().year
+                                bin_date = bin_date.replace(year=current_year)
+
+                                # If the date is in the past, it's probably for next year
+                                if bin_date < datetime.now():
+                                    bin_date = bin_date.replace(year=current_year + 1)
+
+                                collections.append((bin_type, bin_date))
+                                print(
+                                    f"Successfully parsed date for {bin_type}: {bin_date}"
+                                )
+
+                            except ValueError as e:
+                                print(
+                                    f"Failed to parse date '{date_str}' for {bin_type}: {e}"
+                                )
+                                continue
+
+                        # Found and processed the collection dates, so break the loop
+                        break
 
             except Exception as e:
                 print(f"Error processing bin row: {e}")
diff --git a/uk_bin_collection/uk_bin_collection/councils/BradfordMDC.py b/uk_bin_collection/uk_bin_collection/councils/BradfordMDC.py
@@ -130,4 +130,22 @@ def parse_data(self, page: str, **kwargs) -> dict:
             key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
         )
 
+        data["bins"].sort(
+            key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
+        )
+
+        # Deduplicate the bins based on type and collection date
+        # Feels a bit hacky, but fixes
+        # https://github.com/robbrad/UKBinCollectionData/issues/1436
+        unique_bins = []
+        seen = set()
+        for bin_item in data["bins"]:
+            # Create a unique identifier for each bin entry
+            bin_key = (bin_item["type"], bin_item["collectionDate"])
+            if bin_key not in seen:
+                seen.add(bin_key)
+                unique_bins.append(bin_item)
+
+        data["bins"] = unique_bins
+
         return data
diff --git a/uk_bin_collection/uk_bin_collection/councils/EastleighBoroughCouncil.py b/uk_bin_collection/uk_bin_collection/councils/EastleighBoroughCouncil.py
@@ -77,8 +77,13 @@ def parse_data(self, page: str, **kwargs) -> dict:
             return data
 
         except Exception as e:
-            print(f"Error fetching/parsing data: {str(e)}")
-            return {"bins": [{"type": "Error", "collectionDate": "2024-01-01"}]}
+            import traceback
+
+            error_message = f"Error fetching/parsing data for Eastleigh: {str(e)}\n{traceback.format_exc()}"
+            print(error_message)
+            # Use the correct date format for the error fallback
+            today = datetime.now().strftime("%d/%m/%Y")
+            return {"bins": [{"type": "Error", "collectionDate": today}]}
         finally:
             if "driver" in locals():
                 driver.quit()
diff --git a/uk_bin_collection/uk_bin_collection/councils/NorthHertfordshireDistrictCouncil.py b/uk_bin_collection/uk_bin_collection/councils/NorthHertfordshireDistrictCouncil.py