@@ -30,11 +30,31 @@ def parse_data(self, page: str, **kwargs) -> dict:
30
30
check_paon (user_paon )
31
31
headless = kwargs .get ("headless" )
32
32
web_driver = kwargs .get ("web_driver" )
33
- driver = create_webdriver (web_driver , headless , None , __name__ )
33
+ # Use a realistic user agent to help bypass Cloudflare
34
+ user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
35
+ driver = create_webdriver (web_driver , headless , user_agent , __name__ )
34
36
page = "https://www.enfield.gov.uk/services/rubbish-and-recycling/find-my-collection-day"
35
37
driver .get (page )
36
38
37
- time .sleep (5 )
39
+ # Wait for Cloudflare challenge to complete
40
+ print ("Waiting for page to load (Cloudflare check)..." )
41
+ max_attempts = 3
42
+ for attempt in range (max_attempts ):
43
+ try :
44
+ WebDriverWait (driver , 60 ).until (
45
+ lambda d : "Just a moment" not in d .title and d .title != "" and len (d .find_elements (By .TAG_NAME , "input" )) > 1
46
+ )
47
+ print (f"Page loaded: { driver .title } " )
48
+ break
49
+ except :
50
+ print (f"Attempt { attempt + 1 } : Timeout waiting for page load. Current title: { driver .title } " )
51
+ if attempt < max_attempts - 1 :
52
+ time .sleep (10 )
53
+ driver .refresh ()
54
+ else :
55
+ print ("Failed to bypass Cloudflare after multiple attempts" )
56
+
57
+ time .sleep (8 )
38
58
39
59
try :
40
60
accept_cookies = WebDriverWait (driver , timeout = 10 ).until (
@@ -47,23 +67,73 @@ def parse_data(self, page: str, **kwargs) -> dict:
47
67
)
48
68
pass
49
69
50
- postcode_input = WebDriverWait (driver , 10 ).until (
51
- EC .presence_of_element_located (
52
- (By .CSS_SELECTOR , '[aria-label="Enter your address"]' )
53
- )
54
- )
70
+ # Check for multiple iframes and find the correct one
71
+ try :
72
+ iframes = driver .find_elements (By .TAG_NAME , "iframe" )
73
+
74
+ # Try each iframe to find the one with the bin collection form
75
+ for i , iframe in enumerate (iframes ):
76
+ try :
77
+ driver .switch_to .frame (iframe )
78
+
79
+ # Check if this iframe has the postcode input
80
+ time .sleep (2 )
81
+ inputs = driver .find_elements (By .TAG_NAME , "input" )
82
+
83
+ # Look for address-related inputs
84
+ for inp in inputs :
85
+ aria_label = inp .get_attribute ('aria-label' ) or ''
86
+ placeholder = inp .get_attribute ('placeholder' ) or ''
87
+ if 'address' in aria_label .lower () or 'postcode' in placeholder .lower ():
88
+ break
89
+ else :
90
+ # This iframe doesn't have the form, try the next one
91
+ driver .switch_to .default_content ()
92
+ continue
93
+
94
+ # Found the right iframe, break out of the loop
95
+ break
96
+ except Exception as e :
97
+ driver .switch_to .default_content ()
98
+ continue
99
+ else :
100
+ # No suitable iframe found, stay in main content
101
+ driver .switch_to .default_content ()
102
+ except Exception as e :
103
+ pass
104
+
105
+ # Try multiple selectors for the postcode input
106
+ postcode_input = None
107
+ selectors = [
108
+ '[aria-label="Enter your address"]' ,
109
+ 'input[placeholder*="postcode"]' ,
110
+ 'input[placeholder*="address"]' ,
111
+ 'input[type="text"]'
112
+ ]
113
+
114
+ for selector in selectors :
115
+ try :
116
+ postcode_input = WebDriverWait (driver , 5 ).until (
117
+ EC .element_to_be_clickable ((By .CSS_SELECTOR , selector ))
118
+ )
119
+ break
120
+ except :
121
+ continue
122
+
123
+ if not postcode_input :
124
+ raise ValueError ("Could not find postcode input field" )
55
125
56
126
postcode_input .send_keys (user_postcode )
57
127
58
128
find_address_button = WebDriverWait (driver , 10 ).until (
59
- EC .presence_of_element_located ((By .ID , "submitButton0" ))
129
+ EC .element_to_be_clickable ((By .ID , "submitButton0" ))
60
130
)
61
131
find_address_button .click ()
62
132
63
133
time .sleep (15 )
64
134
# Wait for address box to be visible
65
- select_address_input = WebDriverWait (driver , 10 ).until (
66
- EC .presence_of_element_located (
135
+ select_address_input = WebDriverWait (driver , 15 ).until (
136
+ EC .element_to_be_clickable (
67
137
(
68
138
By .CSS_SELECTOR ,
69
139
'[aria-label="Select full address"]' ,
0 commit comments