Skip to content

Commit 5cbc6dc

Browse files
committed
fix: Enfield and Broxbourne
1 parent 80c8b15 commit 5cbc6dc

File tree

2 files changed

+96
-11
lines changed

2 files changed

+96
-11
lines changed

uk_bin_collection/uk_bin_collection/councils/BroxbourneCouncil.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,25 @@ def parse_data(self, page: str, **kwargs) -> dict:
2222
check_postcode(user_postcode)
2323

2424
bindata = {"bins": []}
25-
driver = create_webdriver(web_driver, headless, None, __name__)
25+
# Use a realistic user agent to help bypass Cloudflare
26+
user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
27+
driver = create_webdriver(web_driver, headless, user_agent, __name__)
2628

2729
try:
2830
driver.get("https://www.broxbourne.gov.uk/bin-collection-date")
31+
32+
# Wait for Cloudflare challenge to complete
33+
print("Waiting for page to load (Cloudflare check)...")
34+
try:
35+
WebDriverWait(driver, 45).until(
36+
lambda d: "Just a moment" not in d.title and d.title != "" and len(d.find_elements(By.TAG_NAME, "input")) > 0
37+
)
38+
print(f"Page loaded: {driver.title}")
39+
except:
40+
print(f"Timeout waiting for page load. Current title: {driver.title}")
41+
# Try to continue anyway
42+
pass
43+
2944
time.sleep(8)
3045

3146
# Handle cookie banner with multiple attempts

uk_bin_collection/uk_bin_collection/councils/EnfieldCouncil.py

Lines changed: 80 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,31 @@ def parse_data(self, page: str, **kwargs) -> dict:
3030
check_paon(user_paon)
3131
headless = kwargs.get("headless")
3232
web_driver = kwargs.get("web_driver")
33-
driver = create_webdriver(web_driver, headless, None, __name__)
33+
# Use a realistic user agent to help bypass Cloudflare
34+
user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
35+
driver = create_webdriver(web_driver, headless, user_agent, __name__)
3436
page = "https://www.enfield.gov.uk/services/rubbish-and-recycling/find-my-collection-day"
3537
driver.get(page)
3638

37-
time.sleep(5)
39+
# Wait for Cloudflare challenge to complete
40+
print("Waiting for page to load (Cloudflare check)...")
41+
max_attempts = 3
42+
for attempt in range(max_attempts):
43+
try:
44+
WebDriverWait(driver, 60).until(
45+
lambda d: "Just a moment" not in d.title and d.title != "" and len(d.find_elements(By.TAG_NAME, "input")) > 1
46+
)
47+
print(f"Page loaded: {driver.title}")
48+
break
49+
except:
50+
print(f"Attempt {attempt + 1}: Timeout waiting for page load. Current title: {driver.title}")
51+
if attempt < max_attempts - 1:
52+
time.sleep(10)
53+
driver.refresh()
54+
else:
55+
print("Failed to bypass Cloudflare after multiple attempts")
56+
57+
time.sleep(8)
3858

3959
try:
4060
accept_cookies = WebDriverWait(driver, timeout=10).until(
@@ -47,23 +67,73 @@ def parse_data(self, page: str, **kwargs) -> dict:
4767
)
4868
pass
4969

50-
postcode_input = WebDriverWait(driver, 10).until(
51-
EC.presence_of_element_located(
52-
(By.CSS_SELECTOR, '[aria-label="Enter your address"]')
53-
)
54-
)
70+
# Check for multiple iframes and find the correct one
71+
try:
72+
iframes = driver.find_elements(By.TAG_NAME, "iframe")
73+
74+
# Try each iframe to find the one with the bin collection form
75+
for i, iframe in enumerate(iframes):
76+
try:
77+
driver.switch_to.frame(iframe)
78+
79+
# Check if this iframe has the postcode input
80+
time.sleep(2)
81+
inputs = driver.find_elements(By.TAG_NAME, "input")
82+
83+
# Look for address-related inputs
84+
for inp in inputs:
85+
aria_label = inp.get_attribute('aria-label') or ''
86+
placeholder = inp.get_attribute('placeholder') or ''
87+
if 'address' in aria_label.lower() or 'postcode' in placeholder.lower():
88+
break
89+
else:
90+
# This iframe doesn't have the form, try the next one
91+
driver.switch_to.default_content()
92+
continue
93+
94+
# Found the right iframe, break out of the loop
95+
break
96+
except Exception as e:
97+
driver.switch_to.default_content()
98+
continue
99+
else:
100+
# No suitable iframe found, stay in main content
101+
driver.switch_to.default_content()
102+
except Exception as e:
103+
pass
104+
105+
# Try multiple selectors for the postcode input
106+
postcode_input = None
107+
selectors = [
108+
'[aria-label="Enter your address"]',
109+
'input[placeholder*="postcode"]',
110+
'input[placeholder*="address"]',
111+
'input[type="text"]'
112+
]
113+
114+
for selector in selectors:
115+
try:
116+
postcode_input = WebDriverWait(driver, 5).until(
117+
EC.element_to_be_clickable((By.CSS_SELECTOR, selector))
118+
)
119+
break
120+
except:
121+
continue
122+
123+
if not postcode_input:
124+
raise ValueError("Could not find postcode input field")
55125

56126
postcode_input.send_keys(user_postcode)
57127

58128
find_address_button = WebDriverWait(driver, 10).until(
59-
EC.presence_of_element_located((By.ID, "submitButton0"))
129+
EC.element_to_be_clickable((By.ID, "submitButton0"))
60130
)
61131
find_address_button.click()
62132

63133
time.sleep(15)
64134
# Wait for address box to be visible
65-
select_address_input = WebDriverWait(driver, 10).until(
66-
EC.presence_of_element_located(
135+
select_address_input = WebDriverWait(driver, 15).until(
136+
EC.element_to_be_clickable(
67137
(
68138
By.CSS_SELECTOR,
69139
'[aria-label="Select full address"]',

0 commit comments

Comments
 (0)