1
1
import time
2
- from datetime import datetime
2
+ import re
3
+ from datetime import datetime , timedelta
3
4
4
5
from bs4 import BeautifulSoup
5
6
from selenium .webdriver .common .by import By
11
12
from uk_bin_collection .uk_bin_collection .common import *
12
13
from uk_bin_collection .uk_bin_collection .get_bin_data import AbstractGetBinDataClass
13
14
14
- # import the wonderful Beautiful Soup and the URL grabber
15
-
16
15
17
16
class CouncilClass (AbstractGetBinDataClass ):
18
17
"""
@@ -30,89 +29,214 @@ def parse_data(self, page: str, **kwargs) -> dict:
30
29
31
30
house_number = kwargs .get ("paon" )
32
31
postcode = kwargs .get ("postcode" )
33
- full_address = f"{ house_number } , { postcode } "
32
+ # Use house_number as full address since it contains the complete address
33
+ full_address = house_number if house_number else f"{ house_number } , { postcode } "
34
34
web_driver = kwargs .get ("web_driver" )
35
35
headless = kwargs .get ("headless" )
36
36
37
37
# Create Selenium webdriver
38
38
driver = create_webdriver (web_driver , headless , None , __name__ )
39
39
driver .get (page )
40
40
41
- # If you bang in the house number (or property name) and postcode in the box it should find your property
41
+ # Wait for page to load completely
42
42
wait = WebDriverWait (driver , 60 )
43
- address_entry_field = wait .until (
44
- EC .element_to_be_clickable ((By .XPATH , '//*[@id="combobox-input-22"]' ))
45
- )
46
-
47
- address_entry_field .send_keys (str (full_address ))
48
-
49
- address_entry_field = wait .until (
50
- EC .element_to_be_clickable ((By .XPATH , '//*[@id="combobox-input-22"]' ))
51
- )
52
- address_entry_field .click ()
53
- address_entry_field .send_keys (Keys .BACKSPACE )
54
- address_entry_field .send_keys (str (full_address [len (full_address ) - 1 ]))
55
-
56
- first_found_address = wait .until (
57
- EC .element_to_be_clickable (
58
- (By .XPATH , '//*[@id="dropdown-element-22"]/ul' )
59
- )
60
- )
61
-
62
- first_found_address .click ()
63
- # Wait for the 'Select your property' dropdown to appear and select the first result
64
- next_btn = wait .until (
65
- EC .element_to_be_clickable ((By .XPATH , "//lightning-button/button" ))
66
- )
67
- next_btn .click ()
68
- bin_data = wait .until (
69
- EC .presence_of_element_located (
70
- (By .XPATH , "//span[contains(text(), 'Container')]" )
71
- )
72
- )
73
-
43
+
44
+ # Wait for the Salesforce Lightning page to be fully loaded
45
+ print ("Waiting for Salesforce Lightning components to load..." )
46
+ time .sleep (10 )
47
+
48
+ # Wait for the address input field to be present
49
+ try :
50
+ wait .until (EC .presence_of_element_located ((By .XPATH , "//label[contains(text(), 'Enter your address')]" )))
51
+ print ("Address label found" )
52
+ time .sleep (5 ) # Additional wait for the input field to be ready
53
+ except Exception as e :
54
+ print (f"Address label not found: { e } " )
55
+
56
+ # Find the address input field using the label
57
+ try :
58
+ address_entry_field = driver .find_element (By .XPATH , "//label[contains(text(), 'Enter your address')]/following-sibling::*//input" )
59
+ print ("Found address input field using label xpath" )
60
+ except Exception as e :
61
+ print (f"Could not find address input field: { e } " )
62
+ raise Exception ("Could not find address input field" )
63
+
64
+ # Clear any existing text and enter the address
65
+ try :
66
+ address_entry_field .clear ()
67
+ address_entry_field .send_keys (str (full_address ))
68
+ print (f"Entered address: { full_address } " )
69
+ except Exception as e :
70
+ print (f"Error entering address: { e } " )
71
+ raise
72
+
73
+ # Click the input field again to trigger the dropdown
74
+ try :
75
+ address_entry_field .click ()
76
+ print ("Clicked input field to trigger dropdown" )
77
+ time .sleep (3 ) # Wait for dropdown to appear
78
+ except Exception as e :
79
+ print (f"Error clicking input field: { e } " )
80
+
81
+ # Wait for and click the dropdown option
82
+ try :
83
+ dropdown_wait = WebDriverWait (driver , 10 )
84
+ dropdown_option = dropdown_wait .until (EC .element_to_be_clickable ((By .XPATH , "//li[@role='presentation']" )))
85
+ dropdown_option .click ()
86
+ print ("Clicked dropdown option" )
87
+ time .sleep (2 )
88
+ except Exception as e :
89
+ print (f"Error clicking dropdown option: { e } " )
90
+ raise
91
+
92
+ # Find and click the Next button
93
+ try :
94
+ next_wait = WebDriverWait (driver , 10 )
95
+ next_button = next_wait .until (EC .element_to_be_clickable ((By .XPATH , "//button[contains(text(), 'Next')]" )))
96
+ next_button .click ()
97
+ print ("Clicked Next button" )
98
+ time .sleep (5 ) # Wait for the bin collection data to load
99
+ except Exception as e :
100
+ print (f"Error clicking Next button: { e } " )
101
+ raise
102
+
103
+ # Wait for the bin collection data table to load
104
+ try :
105
+ table_wait = WebDriverWait (driver , 15 )
106
+ table_wait .until (EC .presence_of_element_located ((By .XPATH , "//span[contains(text(), 'Collection Day')]" )))
107
+ print ("Bin collection data table loaded" )
108
+ time .sleep (3 )
109
+ except Exception as e :
110
+ print (f"Bin collection table not found: { e } " )
111
+
74
112
soup = BeautifulSoup (driver .page_source , features = "html.parser" )
75
-
76
- rows = soup .find_all ("tr" , class_ = "slds-hint-parent" )
77
113
current_year = datetime .now ().year
78
114
115
+ # Try multiple approaches to find bin collection data
116
+ rows = []
117
+
118
+ # Try different table row selectors
119
+ table_selectors = [
120
+ "tr.slds-hint-parent" ,
121
+ "tr[class*='slds']" ,
122
+ "table tr" ,
123
+ ".slds-table tr" ,
124
+ "tbody tr"
125
+ ]
126
+
127
+ for selector in table_selectors :
128
+ rows = soup .select (selector )
129
+ if rows :
130
+ break
131
+
132
+ # If no table rows found, try to find any elements containing collection info
133
+ if not rows :
134
+ # Look for any elements that might contain bin collection information
135
+ collection_elements = soup .find_all (text = re .compile (r'(bin|collection|waste|recycling)' , re .I ))
136
+ if collection_elements :
137
+ # Try to extract information from the surrounding elements
138
+ for element in collection_elements [:10 ]: # Limit to first 10 matches
139
+ parent = element .parent
140
+ if parent :
141
+ text = parent .get_text ().strip ()
142
+ if text and len (text ) > 10 : # Only consider substantial text
143
+ # Try to extract date patterns
144
+ date_patterns = re .findall (r'\b\d{1,2}[/-]\d{1,2}[/-]\d{2,4}\b|\b\d{1,2}\s+\w+\s+\d{4}\b' , text )
145
+ if date_patterns :
146
+ data ["bins" ].append ({
147
+ "type" : "General Collection" ,
148
+ "collectionDate" : date_patterns [0 ]
149
+ })
150
+ break
151
+
152
+ # Process table rows if found
79
153
for row in rows :
80
- columns = row .find_all ("td" )
81
- if columns :
82
- container_type = row .find ("th" ).text .strip ()
83
- if columns [0 ].get_text () == "Today" :
84
- collection_day = datetime .now ().strftime ("%a, %d %B" )
85
- elif columns [0 ].get_text () == "Tomorrow" :
86
- collection_day = (datetime .now () + timedelta (days = 1 )).strftime (
87
- "%a, %d %B"
88
- )
89
- else :
90
- collection_day = re .sub (
91
- r"[^a-zA-Z0-9,\s]" , "" , columns [0 ].get_text ()
92
- ).strip ()
154
+ try :
155
+ columns = row .find_all (["td" , "th" ])
156
+ if len (columns ) >= 2 :
157
+ # Try to identify container type and date
158
+ container_type = "Unknown"
159
+ collection_date = ""
160
+
161
+ # Look for header cell (th) for container type
162
+ th_element = row .find ("th" )
163
+ if th_element :
164
+ container_type = th_element .get_text ().strip ()
165
+ elif columns :
166
+ # If no th, use first column as type
167
+ container_type = columns [0 ].get_text ().strip ()
168
+
169
+ # Look for date in subsequent columns
170
+ for col in columns [1 :] if th_element else columns [1 :]:
171
+ col_text = col .get_text ().strip ()
172
+ if col_text :
173
+ if col_text .lower () == "today" :
174
+ collection_date = datetime .now ().strftime ("%d/%m/%Y" )
175
+ break
176
+ elif col_text .lower () == "tomorrow" :
177
+ collection_date = (datetime .now () + timedelta (days = 1 )).strftime ("%d/%m/%Y" )
178
+ break
179
+ else :
180
+ # Try to parse various date formats
181
+ try :
182
+ # Clean the text
183
+ clean_text = re .sub (r"[^a-zA-Z0-9,\s/-]" , "" , col_text ).strip ()
184
+
185
+ # Try different date parsing approaches
186
+ date_formats = [
187
+ "%a, %d %B" ,
188
+ "%d %B %Y" ,
189
+ "%d/%m/%Y" ,
190
+ "%d-%m-%Y" ,
191
+ "%B %d, %Y"
192
+ ]
193
+
194
+ for fmt in date_formats :
195
+ try :
196
+ parsed_date = datetime .strptime (clean_text , fmt )
197
+ if fmt == "%a, %d %B" : # Add year if missing
198
+ if parsed_date .replace (year = current_year ) < datetime .now ():
199
+ parsed_date = parsed_date .replace (year = current_year + 1 )
200
+ else :
201
+ parsed_date = parsed_date .replace (year = current_year )
202
+ collection_date = parsed_date .strftime ("%d/%m/%Y" )
203
+ break
204
+ except ValueError :
205
+ continue
206
+
207
+ if collection_date :
208
+ break
209
+ except Exception :
210
+ continue
211
+
212
+ # Add to data if we have both type and date
213
+ if container_type and collection_date and container_type .lower () != "unknown" :
214
+ data ["bins" ].append ({
215
+ "type" : container_type ,
216
+ "collectionDate" : collection_date
217
+ })
218
+ except Exception as e :
219
+ print (f"Error processing row: { e } " )
220
+ continue
221
+
222
+ # If no data found, add a debug entry
223
+ if not data ["bins" ]:
224
+ print ("No bin collection data found. Page source:" )
225
+ print (driver .page_source [:1000 ]) # Print first 1000 chars for debugging
93
226
94
- # Parse the date from the string
95
- parsed_date = datetime .strptime (collection_day , "%a, %d %B" )
96
- if parsed_date < datetime (
97
- parsed_date .year , parsed_date .month , parsed_date .day
98
- ):
99
- parsed_date = parsed_date .replace (year = current_year + 1 )
100
- else :
101
- parsed_date = parsed_date .replace (year = current_year )
102
- # Format the date as %d/%m/%Y
103
- formatted_date = parsed_date .strftime ("%d/%m/%Y" )
104
-
105
- # Add the bin type and collection date to the 'data' dictionary
106
- data ["bins" ].append (
107
- {"type" : container_type , "collectionDate" : formatted_date }
108
- )
109
227
except Exception as e :
110
228
# Here you can log the exception if needed
111
229
print (f"An error occurred: { e } " )
230
+ print (f"Full address used: { full_address } " )
231
+ print (f"Page URL: { page } " )
232
+ # Add some debug information
233
+ if driver :
234
+ print (f"Current page title: { driver .title } " )
235
+ print (f"Current URL: { driver .current_url } " )
112
236
# Optionally, re-raise the exception if you want it to propagate
113
237
raise
114
238
finally :
115
239
# This block ensures that the driver is closed regardless of an exception
116
240
if driver :
117
241
driver .quit ()
118
- return data
242
+ return data
0 commit comments