1
- import time
2
1
import re
3
2
from datetime import datetime
4
3
7
6
from selenium .webdriver .common .keys import Keys
8
7
from selenium .webdriver .support import expected_conditions as EC
9
8
from selenium .webdriver .support .ui import Select , WebDriverWait
9
+ from selenium .common .exceptions import TimeoutException , NoSuchElementException
10
10
11
11
from uk_bin_collection .uk_bin_collection .common import *
12
12
from uk_bin_collection .uk_bin_collection .get_bin_data import AbstractGetBinDataClass
@@ -27,56 +27,95 @@ def parse_data(self, page: str, **kwargs) -> dict:
27
27
headless = kwargs .get ("headless" )
28
28
web_driver = kwargs .get ("web_driver" )
29
29
driver = create_webdriver (web_driver , headless , None , __name__ )
30
- page = "https://www.angus.gov.uk/bins_litter_and_recycling/bin_collection_days"
31
-
32
- driver .get (page )
30
+
31
+ driver .get ("https://www.angus.gov.uk/bins_litter_and_recycling/bin_collection_days" )
33
32
34
- wait = WebDriverWait (driver , 10 )
35
- accept_cookies_button = wait .until (
36
- EC .element_to_be_clickable ((By .ID , "ccc-recommended-settings" ))
37
- )
38
- accept_cookies_button .click ()
33
+ wait = WebDriverWait (driver , 20 )
34
+
35
+ # Accept cookies if present
36
+ try :
37
+ accept_cookies_button = wait .until (
38
+ EC .element_to_be_clickable ((By .ID , "ccc-recommended-settings" ))
39
+ )
40
+ accept_cookies_button .click ()
41
+ except TimeoutException :
42
+ print ("Cookie banner not found, continuing..." )
39
43
44
+ # Click on "Find bin collection days" link
40
45
find_your_collection_button = wait .until (
41
46
EC .element_to_be_clickable (
42
- (By .XPATH , "/html/body/div[2]/div[2]/div/div/section/div[2]/div/article/div/div/p[2]/a " )
47
+ (By .XPATH , "//a[contains(text(), 'Find bin collection days') or contains(@href, 'collection')] " )
43
48
)
44
49
)
45
50
find_your_collection_button .click ()
46
51
52
+ # Wait for iframe to be present and switch to it
47
53
iframe = wait .until (EC .presence_of_element_located ((By .ID , "fillform-frame-1" )))
48
54
driver .switch_to .frame (iframe )
49
55
50
- postcode_input = wait .until (EC .presence_of_element_located ((By .ID , "searchString" )))
51
- postcode_input .send_keys (user_postcode + Keys .TAB + Keys .ENTER )
52
-
53
- time .sleep (15 )
54
-
55
- select_elem = wait .until (EC .presence_of_element_located ((By .ID , "customerAddress" )))
56
- WebDriverWait (driver , 10 ).until (
57
- lambda d : len (select_elem .find_elements (By .TAG_NAME , "option" )) > 1
58
- )
59
- dropdown = Select (select_elem )
56
+ # Handle banner/modal if present
57
+ try :
58
+ close_button = wait .until (EC .element_to_be_clickable ((By .TAG_NAME , "button" )))
59
+ if close_button .text .strip ().lower () in ['close' , 'dismiss' , 'ok' ]:
60
+ close_button .click ()
61
+ except TimeoutException :
62
+ pass
63
+
64
+ # Wait for postcode input to be clickable
65
+ postcode_input = wait .until (EC .element_to_be_clickable ((By .ID , "searchString" )))
66
+ postcode_input .clear ()
67
+ postcode_input .send_keys (user_postcode )
68
+
69
+ # Find and click the search button
70
+ try :
71
+ submit_btn = driver .find_element (By .XPATH , "//button[contains(text(), 'Search')]" )
72
+ submit_btn .click ()
73
+ except :
74
+ try :
75
+ submit_btn = driver .find_element (By .XPATH , "//input[@type='submit']" )
76
+ submit_btn .click ()
77
+ except :
78
+ postcode_input .send_keys (Keys .TAB )
79
+ postcode_input .send_keys (Keys .ENTER )
80
+
81
+ # Wait for address dropdown to be present
82
+ address_dropdown = wait .until (EC .presence_of_element_located ((By .ID , "customerAddress" )))
83
+
84
+ # Wait for dropdown options to populate with extended timeout
85
+ try :
86
+ WebDriverWait (driver , 30 ).until (
87
+ lambda d : len (d .find_element (By .ID , "customerAddress" ).find_elements (By .TAG_NAME , "option" )) > 1
88
+ )
89
+ except TimeoutException :
90
+ options = address_dropdown .find_elements (By .TAG_NAME , "option" )
91
+ raise ValueError (f"Dropdown only has { len (options )} options after 30s wait" )
92
+
93
+ # Select the UPRN from dropdown
94
+ dropdown = Select (address_dropdown )
60
95
dropdown .select_by_value (user_uprn )
61
96
62
- time .sleep (10 )
63
-
97
+ # Wait for results to appear
64
98
wait .until (
65
99
EC .presence_of_element_located (
66
- (By .CSS_SELECTOR , "span.fieldInput.content.html.non-input" ))
100
+ (By .CSS_SELECTOR , "span.fieldInput.content.html.non-input" )
101
+ )
67
102
)
103
+
104
+ # Wait additional time for JavaScript to populate the data
105
+ import time
106
+ time .sleep (15 ) # Wait 15 seconds for dynamic content to load
68
107
108
+ # Parse the results
69
109
soup = BeautifulSoup (driver .page_source , "html.parser" )
70
110
bin_data = {"bins" : []}
71
111
current_date = datetime .now ()
72
112
current_formatted_date = None
73
113
74
114
spans = soup .select ("span.fieldInput.content.html.non-input" )
75
- print (f"Found { len (spans )} bin info spans." )
76
115
77
116
for i , span in enumerate (spans ):
78
117
try :
79
- # Look for any non-empty <u> tag recursively
118
+ # Look for date in <u> tags
80
119
date_tag = next (
81
120
(u for u in span .find_all ("u" ) if u and u .text .strip ()),
82
121
None
@@ -93,22 +132,15 @@ def parse_data(self, page: str, **kwargs) -> dict:
93
132
if parsed_date .date () < current_date .date ():
94
133
parsed_date = parsed_date .replace (year = current_date .year + 1 )
95
134
current_formatted_date = parsed_date .strftime ("%d/%m/%Y" )
96
- print (f"[{ i } ] Parsed date: { current_formatted_date } " )
97
- except ValueError as ve :
98
- print (f"[{ i } ] Could not parse date: '{ full_date_str } ' - { ve } " )
135
+ except ValueError :
99
136
continue
100
- else :
101
- print (f"[{ i } ] No date tag found, using last valid date: { current_formatted_date } " )
102
-
103
- if not current_formatted_date :
104
- print (f"[{ i } ] No current date to associate bin type with — skipping." )
105
- continue
106
137
107
- if not bin_type_tag or not bin_type_tag .text .strip ():
108
- print (f"[{ i } ] No bin type found — skipping." )
138
+ if not current_formatted_date or not bin_type_tag :
109
139
continue
110
140
111
141
bin_type = bin_type_tag .text .strip ()
142
+ if not bin_type :
143
+ continue
112
144
113
145
# Optional seasonal override
114
146
try :
@@ -118,25 +150,16 @@ def parse_data(self, page: str, **kwargs) -> dict:
118
150
except Exception :
119
151
pass
120
152
121
- print (f"[{ i } ] Found bin: { bin_type } on { current_formatted_date } " )
122
-
123
153
bin_data ["bins" ].append ({
124
154
"type" : bin_type ,
125
155
"collectionDate" : current_formatted_date
126
156
})
127
157
128
- except Exception as inner_e :
129
- print (f"[{ i } ] Skipping span due to error: { inner_e } " )
130
- continue
131
-
132
- except Exception as inner_e :
133
- print (f"Skipping span due to error: { inner_e } " )
158
+ except Exception :
134
159
continue
135
160
136
161
if not bin_data ["bins" ]:
137
162
raise ValueError ("No bin data found." )
138
-
139
- print (bin_data )
140
163
141
164
return bin_data
142
165
0 commit comments