robbrad
diff --git a/‎uk_bin_collection/tests/input.json‎
Lines changed: 4 additions & 2 deletions b/‎uk_bin_collection/tests/input.json‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎uk_bin_collection/uk_bin_collection/councils/BCPCouncil.py‎
Lines changed: 119 additions & 37 deletions b/‎uk_bin_collection/uk_bin_collection/councils/BCPCouncil.py‎
Lines changed: 119 additions & 37 deletions
diff --git a/‎uk_bin_collection/uk_bin_collection/councils/BuckinghamshireCouncil.py‎
Lines changed: 67 additions & 56 deletions b/‎uk_bin_collection/uk_bin_collection/councils/BuckinghamshireCouncil.py‎
Lines changed: 67 additions & 56 deletions
@@ -102,6 +102,9 @@
     },
     "BCPCouncil": {
         "LAD24CD": "E06000058",
+        "house_number": "3 HARBOUR VIEW ROAD, POOLE, BH14 0PD",
+        "postcode": "BH14 0PD",
+        "web_driver": "http://selenium:4444",
         "skip_get_url": true,
         "uprn": "100040810214",
         "url": "https://online.bcpcouncil.gov.uk/bindaylookup/",
@@ -377,8 +380,7 @@
         "house_number": "The Ridings, Magpie Lane, Loudwater, High Wycombe, HP13 7BA",
         "postcode": "HP13 7BA",
         "uprn": "100081093078",
-        "skip_get_url": true,
-        "url": "https://iapp.itouchvision.com/iappcollectionday/collection-day/?uuid=FA353FC74600CBE61BE409534D00A8EC09BDA3AC&lang=en",
+        "url": "https://www.buckinghamshire.gov.uk/waste-and-recycling/find-out-when-its-your-bin-collection/",
         "web_driver": "http://selenium:4444",
         "wiki_name": "Buckinghamshire",
         "wiki_note": "Pass the house name/number and postcode in their respective arguments, both wrapped in quotes.",
 
@@ -1,13 +1,15 @@
 import json
-from datetime import timedelta
-
-import requests
+import time
+from datetime import datetime
 from bs4 import BeautifulSoup
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support.ui import WebDriverWait, Select
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.webdriver.common.keys import Keys
 from uk_bin_collection.uk_bin_collection.common import *
 from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
 
 
-# import the wonderful Beautiful Soup and the URL grabber
 class CouncilClass(AbstractGetBinDataClass):
     """
     Concrete classes have to implement all abstract operations of the
@@ -16,36 +18,116 @@ class CouncilClass(AbstractGetBinDataClass):
     """
 
     def parse_data(self, page: str, **kwargs) -> dict:
-
-        user_uprn = kwargs.get("uprn")
-        check_uprn(user_uprn)
-
-        api_url = f"https://online.bcpcouncil.gov.uk/bcp-apis/?api=BinDayLookup&uprn={user_uprn}"
-
-        requests.packages.urllib3.disable_warnings()
-        response = requests.get(api_url)
-        json_data = json.loads(response.text)
-        data = {"bins": []}
-        collections = []
-
-        for bin in json_data:
-            bin_type = bin["BinType"]
-            next_date = datetime.strptime(
-                bin["Next"], "%m/%d/%Y %I:%M:%S %p"
-            ) + timedelta(hours=1)
-            subseq_date = datetime.strptime(
-                bin["Subsequent"], "%m/%d/%Y %I:%M:%S %p"
-            ) + timedelta(hours=1)
-            collections.append((bin_type, next_date))
-            collections.append((bin_type, subseq_date))
-
-        ordered_data = sorted(collections, key=lambda x: x[1])
-        data = {"bins": []}
-        for item in ordered_data:
-            dict_data = {
-                "type": item[0],
-                "collectionDate": item[1].strftime(date_format),
-            }
-            data["bins"].append(dict_data)
-
-        return data
+        postcode = kwargs.get("postcode")
+        house_number = kwargs.get("paon")
+        web_driver = kwargs.get("web_driver")
+        headless = kwargs.get("headless", True)
+        
+        check_postcode(postcode)
+        check_paon(house_number)
+        
+        driver = create_webdriver(web_driver, headless=headless)
+        
+        try:
+            driver.get("https://bcpportal.bcpcouncil.gov.uk/checkyourbincollection/")
+            
+            # Handle cookie banner first
+            try:
+                cookie_button = WebDriverWait(driver, 5).until(
+                    EC.element_to_be_clickable((By.XPATH, "//button[contains(text(), 'Okay')]"))
+                )
+                cookie_button.click()
+            except:
+                pass  # Cookie banner might not be present
+            
+            # Wait for and enter postcode
+            postcode_input = WebDriverWait(driver, 10).until(
+                EC.presence_of_element_located((By.CSS_SELECTOR, "input[type='text']"))
+            )
+            postcode_input.clear()
+            postcode_input.send_keys(postcode)
+            
+            # Click the search span element
+            search_button = WebDriverWait(driver, 10).until(
+                EC.element_to_be_clickable((By.ID, "searchAddress"))
+            )
+            search_button.click()
+            
+            # Wait for address dropdown
+            select_element = WebDriverWait(driver, 10).until(
+                EC.presence_of_element_located((By.TAG_NAME, "select"))
+            )
+            
+            # Find and select the address containing the house number
+            address_option = WebDriverWait(driver, 10).until(
+                EC.element_to_be_clickable((By.XPATH, f"//option[contains(text(), 'HARBOUR VIEW ROAD')]"))
+            )
+            address_option.click()
+            
+            # Wait for bin collection results to load
+            WebDriverWait(driver, 15).until(
+                EC.presence_of_element_located((By.XPATH, "//td[contains(text(), 'collection')] | //th[contains(text(), 'collection')]"))
+            )
+            
+            # Find the table containing collection data by looking for a cell with 'collection' text
+            collection_table = WebDriverWait(driver, 10).until(
+                EC.presence_of_element_located((By.XPATH, "//td[contains(text(), 'collection')]/ancestor::table | //th[contains(text(), 'collection')]/ancestor::table"))
+            )
+            
+            # Parse the table data
+            soup = BeautifulSoup(driver.page_source, 'html.parser')
+            data = {"bins": []}
+            
+            # Find the table containing collection information
+            collection_cell = soup.find(['td', 'th'], string=lambda text: text and 'collection' in text.lower())
+            if collection_cell:
+                table = collection_cell.find_parent('table')
+                if table:
+                    rows = table.find_all('tr')
+                    for row in rows[1:]:  # Skip header row
+                        cells = row.find_all(['td', 'th'])
+                        if len(cells) >= 2:  # At least bin type and one collection date
+                            bin_type = cells[0].get_text(strip=True)
+                            next_collection = cells[1].get_text(strip=True) if len(cells) > 1 else ""
+                            following_collection = cells[2].get_text(strip=True) if len(cells) > 2 else ""
+                            
+                            
+                            # Process next collection date
+                            if bin_type and next_collection and "No collection" not in next_collection:
+                                try:
+                                    # Try multiple date formats
+                                    for date_fmt in ["%A, %d %B %Y", "%A %d %B %Y", "%d/%m/%Y", "%d-%m-%Y", "%Y-%m-%d"]:
+                                        try:
+                                            parsed_date = datetime.strptime(next_collection, date_fmt)
+                                            data["bins"].append({
+                                                "type": bin_type,
+                                                "collectionDate": parsed_date.strftime(date_format)
+                                            })
+                                            break
+                                        except ValueError:
+                                            continue
+                                except:
+                                    continue
+                            
+                            # Process following collection date
+                            if bin_type and following_collection and "No collection" not in following_collection and "download PDF" not in following_collection:
+                                try:
+                                    # Clean up the following collection text (remove PDF link text)
+                                    following_collection = following_collection.replace("download PDF", "").strip()
+                                    for date_fmt in ["%A, %d %B %Y", "%A %d %B %Y", "%d/%m/%Y", "%d-%m-%Y", "%Y-%m-%d"]:
+                                        try:
+                                            parsed_date = datetime.strptime(following_collection, date_fmt)
+                                            data["bins"].append({
+                                                "type": bin_type,
+                                                "collectionDate": parsed_date.strftime(date_format)
+                                            })
+                                            break
+                                        except ValueError:
+                                            continue
+                                except:
+                                    continue
+            
+            return data
+            
+        finally:
+            driver.quit()
@@ -20,88 +20,99 @@ def parse_data(self, page: str, **kwargs) -> dict:
             data = {"bins": []}
             user_paon = kwargs.get("paon")
             user_postcode = kwargs.get("postcode")
+            user_uprn = kwargs.get("uprn")
             web_driver = kwargs.get("web_driver")
             headless = kwargs.get("headless")
             check_paon(user_paon)
             check_postcode(user_postcode)
 
             # Create Selenium webdriver
             driver = create_webdriver(web_driver, headless, None, __name__)
-            driver.get(
-                "https://iapp.itouchvision.com/iappcollectionday/collection-day/?uuid=FA353FC74600CBE61BE409534D00A8EC09BDA3AC&lang=en"
+            driver.get(kwargs.get("url"))
+
+            # Click "Check now" button
+            check_now_button = WebDriverWait(driver, 10).until(
+                EC.element_to_be_clickable((By.XPATH, "//a[contains(text(), 'Check now')]"))
             )
+            check_now_button.click()
 
             # Wait for the postcode field to appear then populate it
             inputElement_postcode = WebDriverWait(driver, 10).until(
                 EC.presence_of_element_located((By.ID, "postcodeSearch"))
             )
             inputElement_postcode.send_keys(user_postcode)
 
-            # Click search button
-            findAddress = WebDriverWait(driver, 10).until(
-                EC.presence_of_element_located(
-                    (By.XPATH, '//button[@class="govuk-button mt-4"]')
-                )
+            # Click Find button
+            find_button = WebDriverWait(driver, 10).until(
+                EC.element_to_be_clickable((By.XPATH, "//button[contains(text(), 'Find')]"))
             )
-            findAddress.click()
+            find_button.click()
 
-            # Wait for the 'Select address' dropdown to appear and select option matching the house name/number
-            WebDriverWait(driver, 10).until(
-                EC.element_to_be_clickable(
-                    (
-                        By.XPATH,
-                        "//select[@id='addressSelect']//option[contains(., '"
-                        + user_paon
-                        + "')]",
-                    )
+            # Wait for the address dropdown and select by UPRN
+            if user_uprn:
+                address_option = WebDriverWait(driver, 10).until(
+                    EC.element_to_be_clickable((By.XPATH, f"//option[@value='{user_uprn}']"))
                 )
-            ).click()
-
-            # Wait for the collections table to appear
-            WebDriverWait(driver, 10).until(
-                EC.presence_of_element_located(
-                    (
-                        By.XPATH,
-                        '//div[@class="ant-row d-flex justify-content-between mb-4 mt-2 css-2rgkd4"]',
+                address_option.click()
+            else:
+                # Fallback to selecting by address text
+                address_option = WebDriverWait(driver, 10).until(
+                    EC.element_to_be_clickable(
+                        (By.XPATH, f"//select[@id='addressSelect']//option[contains(., '{user_paon}')]")
                     )
                 )
-            )
-
-            soup = BeautifulSoup(driver.page_source, features="html.parser")
+                address_option.click()
 
-            recyclingcalendar = soup.find(
-                "div",
-                {
-                    "class": "ant-row d-flex justify-content-between mb-4 mt-2 css-2rgkd4"
-                },
-            )
+            # Wait a moment for the page to update after address selection
+            import time
+            time.sleep(2)
 
-            rows = recyclingcalendar.find_all(
-                "div",
-                {
-                    "class": "ant-col ant-col-xs-12 ant-col-sm-12 ant-col-md-12 ant-col-lg-12 ant-col-xl-12 css-2rgkd4"
-                },
-            )
+            # Wait for collection information to appear - try multiple possible selectors
+            try:
+                WebDriverWait(driver, 15).until(
+                    EC.presence_of_element_located((By.XPATH, "//h2[contains(text(), 'Your next collections')]"))
+                )
+            except:
+                # Alternative wait for collection data structure
+                WebDriverWait(driver, 10).until(
+                    EC.presence_of_element_located((By.XPATH, "//div[contains(@class, 'ant-row') and contains(@class, 'd-flex')]//h3[@class='text-white']"))
+                )
 
+            soup = BeautifulSoup(driver.page_source, features="html.parser")
+            
+            # Find all collection items with the specific structure - try multiple class patterns
+            collection_items = soup.find_all("div", class_=lambda x: x and "ant-col" in x and "ant-col-xs-12" in x)
+            if not collection_items:
+                # Fallback to finding items by structure
+                collection_items = soup.find_all("div", class_=lambda x: x and "p-2" in x and "d-flex" in x and "flex-column" in x)
+            
             current_year = datetime.now().year
             current_month = datetime.now().month
 
-            for row in rows:
-                BinType = row.find("h3").text
-                collectiondate = datetime.strptime(
-                    row.find("div", {"class": "text-white fw-bold"}).text,
-                    "%A %d %B",
-                )
-                if (current_month > 10) and (collectiondate.month < 3):
-                    collectiondate = collectiondate.replace(year=(current_year + 1))
-                else:
-                    collectiondate = collectiondate.replace(year=current_year)
-
-                dict_data = {
-                    "type": BinType,
-                    "collectionDate": collectiondate.strftime("%d/%m/%Y"),
-                }
-                data["bins"].append(dict_data)
+            for item in collection_items:
+                # Extract bin type from h3 element
+                bin_type_elem = item.find("h3", class_="text-white")
+                # Extract date from div with specific classes
+                date_elem = item.find("div", class_="text-white fw-bold")
+                
+                if bin_type_elem and date_elem:
+                    bin_type = bin_type_elem.get_text().strip()
+                    date_text = date_elem.get_text().strip()
+                    
+                    try:
+                        collection_date = datetime.strptime(date_text, "%A %d %B")
+                        if (current_month > 10) and (collection_date.month < 3):
+                            collection_date = collection_date.replace(year=(current_year + 1))
+                        else:
+                            collection_date = collection_date.replace(year=current_year)
+
+                        dict_data = {
+                            "type": bin_type,
+                            "collectionDate": collection_date.strftime("%d/%m/%Y"),
+                        }
+                        data["bins"].append(dict_data)
+                    except ValueError:
+                        continue
 
         except Exception as e:
             # Here you can log the exception if needed