Skip to content

Commit d5516f6

Browse files
committed
fix: #1570 - Slough Borough Council
fix: #1570 - Slough Borough Council
1 parent 08bf867 commit d5516f6

File tree

1 file changed

+39
-21
lines changed

1 file changed

+39
-21
lines changed

uk_bin_collection/uk_bin_collection/councils/SloughBoroughCouncil.py

Lines changed: 39 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,18 @@
1-
import time
21
import re
3-
import requests
2+
import time
43
from datetime import datetime
4+
5+
import requests
56
from bs4 import BeautifulSoup
67
from selenium.webdriver.common.by import By
78
from selenium.webdriver.common.keys import Keys
89
from selenium.webdriver.support import expected_conditions as EC
910
from selenium.webdriver.support.ui import WebDriverWait
11+
1012
from uk_bin_collection.uk_bin_collection.common import *
1113
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
1214

15+
1316
def get_street_from_postcode(postcode: str, api_key: str) -> str:
1417
url = "https://maps.googleapis.com/maps/api/geocode/json"
1518
params = {"address": postcode, "key": api_key}
@@ -25,6 +28,7 @@ def get_street_from_postcode(postcode: str, api_key: str) -> str:
2528

2629
raise ValueError("No street (route) found in the response.")
2730

31+
2832
class CouncilClass(AbstractGetBinDataClass):
2933
def parse_data(self, page: str, **kwargs) -> dict:
3034
driver = None
@@ -37,10 +41,10 @@ def parse_data(self, page: str, **kwargs) -> dict:
3741

3842
headless = kwargs.get("headless")
3943
web_driver = kwargs.get("web_driver")
40-
driver = create_webdriver(web_driver, headless, None, __name__)
44+
UserAgent = "Mozilla/5.0"
45+
driver = create_webdriver(web_driver, headless, UserAgent, __name__)
4146
page = "https://www.slough.gov.uk/bin-collections"
4247
driver.get(page)
43-
4448
# Accept cookies
4549
WebDriverWait(driver, 10).until(
4650
EC.element_to_be_clickable((By.ID, "ccc-recommended-settings"))
@@ -50,14 +54,20 @@ def parse_data(self, page: str, **kwargs) -> dict:
5054
address_input = WebDriverWait(driver, 10).until(
5155
EC.presence_of_element_located((By.ID, "keyword_directory25"))
5256
)
53-
user_address = get_street_from_postcode(user_postcode, "AIzaSyBDLULT7EIlNtHerswPtfmL15Tt3Oc0bV8")
57+
user_address = get_street_from_postcode(
58+
user_postcode, "AIzaSyBDLULT7EIlNtHerswPtfmL15Tt3Oc0bV8"
59+
)
5460
address_input.send_keys(user_address + Keys.ENTER)
5561

5662
# Wait for address results to load
5763
WebDriverWait(driver, 10).until(
58-
EC.presence_of_all_elements_located((By.CSS_SELECTOR, "span.list__link-text"))
64+
EC.presence_of_all_elements_located(
65+
(By.CSS_SELECTOR, "span.list__link-text")
66+
)
67+
)
68+
span_elements = driver.find_elements(
69+
By.CSS_SELECTOR, "span.list__link-text"
5970
)
60-
span_elements = driver.find_elements(By.CSS_SELECTOR, "span.list__link-text")
6171

6272
for span in span_elements:
6373
if user_address.lower() in span.text.lower():
@@ -68,7 +78,9 @@ def parse_data(self, page: str, **kwargs) -> dict:
6878

6979
# Wait for address detail page
7080
WebDriverWait(driver, 10).until(
71-
EC.presence_of_element_located((By.CSS_SELECTOR, "section.site-content"))
81+
EC.presence_of_element_located(
82+
(By.CSS_SELECTOR, "section.site-content")
83+
)
7284
)
7385
soup = BeautifulSoup(driver.page_source, "html.parser")
7486

@@ -86,28 +98,33 @@ def parse_data(self, page: str, **kwargs) -> dict:
8698
bin_url = "https://www.slough.gov.uk" + bin_url
8799

88100
# Visit the child page
89-
print(f"Navigating to {bin_url}")
101+
# print(f"Navigating to {bin_url}")
90102
driver.get(bin_url)
91103
WebDriverWait(driver, 10).until(
92-
EC.presence_of_element_located((By.CSS_SELECTOR, "div.page-content"))
104+
EC.presence_of_element_located(
105+
(By.CSS_SELECTOR, "div.page-content")
106+
)
93107
)
94108
child_soup = BeautifulSoup(driver.page_source, "html.parser")
95109

96110
editor_div = child_soup.find("div", class_="editor")
97111
if not editor_div:
98-
print("No editor div found on bin detail page.")
112+
# print("No editor div found on bin detail page.")
99113
continue
100114

101115
ul = editor_div.find("ul")
102116
if not ul:
103-
print("No <ul> with dates found in editor div.")
117+
# print("No <ul> with dates found in editor div.")
104118
continue
105119

106120
for li in ul.find_all("li"):
107121
raw_text = li.get_text(strip=True).replace(".", "")
108122

109-
if "no collection" in raw_text.lower() or "no collections" in raw_text.lower():
110-
print(f"Ignoring non-collection note: {raw_text}")
123+
if (
124+
"no collection" in raw_text.lower()
125+
or "no collections" in raw_text.lower()
126+
):
127+
# print(f"Ignoring non-collection note: {raw_text}")
111128
continue
112129

113130
raw_date = raw_text
@@ -117,24 +134,25 @@ def parse_data(self, page: str, **kwargs) -> dict:
117134
except ValueError:
118135
raw_date_cleaned = raw_date.split("(")[0].strip()
119136
try:
120-
parsed_date = datetime.strptime(raw_date_cleaned, "%d %B %Y")
137+
parsed_date = datetime.strptime(
138+
raw_date_cleaned, "%d %B %Y"
139+
)
121140
except Exception:
122141
print(f"Could not parse date: {raw_text}")
123142
continue
124143

125144
formatted_date = parsed_date.strftime("%d/%m/%Y")
126145
contains_date(formatted_date)
127-
bin_data["bins"].append({
128-
"type": bin_type,
129-
"collectionDate": formatted_date
130-
})
146+
bin_data["bins"].append(
147+
{"type": bin_type, "collectionDate": formatted_date}
148+
)
131149

132-
print(f"Type: {bin_type}, Date: {formatted_date}")
150+
# print(f"Type: {bin_type}, Date: {formatted_date}")
133151

134152
except Exception as e:
135153
print(f"An error occurred: {e}")
136154
raise
137155
finally:
138156
if driver:
139157
driver.quit()
140-
return bin_data
158+
return bin_data

0 commit comments

Comments
 (0)