2
2
from selenium .webdriver .common .by import By
3
3
from selenium .webdriver .support import expected_conditions as EC
4
4
from selenium .webdriver .support .wait import WebDriverWait
5
+ from selenium .webdriver .support .ui import Select
5
6
6
7
from uk_bin_collection .uk_bin_collection .common import *
7
8
from uk_bin_collection .uk_bin_collection .get_bin_data import AbstractGetBinDataClass
8
9
9
10
10
- # import the wonderful Beautiful Soup and the URL grabber
11
11
class CouncilClass (AbstractGetBinDataClass ):
12
12
"""
13
13
Concrete classes have to implement all abstract operations of the
@@ -21,97 +21,102 @@ def parse_data(self, page: str, **kwargs) -> dict:
21
21
data = {"bins" : []}
22
22
user_paon = kwargs .get ("paon" )
23
23
user_postcode = kwargs .get ("postcode" )
24
+ user_uprn = kwargs .get ("uprn" )
24
25
web_driver = kwargs .get ("web_driver" )
25
26
headless = kwargs .get ("headless" )
26
- check_paon (user_paon )
27
27
check_postcode (user_postcode )
28
28
29
29
# Create Selenium webdriver
30
30
driver = create_webdriver (web_driver , headless , None , __name__ )
31
- driver .get (
32
- "https://eastrenfrewshire.gov.uk/article/1145/Bin-collection-days"
33
- )
31
+ driver .get ("https://eastrenfrewshire.gov.uk/bin-days" )
34
32
35
33
# Wait for the postcode field to appear then populate it
36
34
inputElement_postcode = WebDriverWait (driver , 30 ).until (
37
35
EC .presence_of_element_located (
38
- (By .ID , "RESIDUALWASTEV2_PAGE1_POSTCODE " )
36
+ (By .CSS_SELECTOR , "input[autocomplete='postal-code'] " )
39
37
)
40
38
)
41
39
inputElement_postcode .send_keys (user_postcode )
42
40
43
41
# Click search button
44
- findAddress = WebDriverWait (driver , 10 ).until (
45
- EC .presence_of_element_located (
46
- (By .ID , "RESIDUALWASTEV2_PAGE1_FIELD199_NEXT" )
47
- )
48
- )
49
- findAddress .click ()
50
-
51
- # Wait for the 'Select address' dropdown to appear and select option matching the house name/number
52
- WebDriverWait (driver , 10 ).until (
42
+ search_button = WebDriverWait (driver , 10 ).until (
53
43
EC .element_to_be_clickable (
54
- (
55
- By .XPATH ,
56
- "//select[@id='RESIDUALWASTEV2_PAGE2_UPRN']//option[contains(., '"
57
- + user_paon
58
- + "')]" ,
59
- )
44
+ (By .XPATH , "//button[text()='Search']" )
60
45
)
61
- ).click ()
46
+ )
47
+ search_button .click ()
62
48
63
- # Click search button
64
- findDates = WebDriverWait (driver , 10 ).until (
49
+ # Wait for the addresses dropdown to appear
50
+ addresses_select = WebDriverWait (driver , 10 ).until (
65
51
EC .presence_of_element_located (
66
- (By .ID , "RESIDUALWASTEV2_PAGE2_FIELD206_NEXT " )
52
+ (By .XPATH , "//label[text()='Addresses']/following-sibling::select " )
67
53
)
68
54
)
69
- findDates .click ()
55
+
56
+ # Select the appropriate address based on UPRN or house number
57
+ select = Select (addresses_select )
58
+ if user_uprn :
59
+ # Select by UPRN value
60
+ select .select_by_value (user_uprn )
61
+ elif user_paon :
62
+ # Select by house number/name in the text
63
+ for option in select .options :
64
+ if user_paon in option .text :
65
+ select .select_by_visible_text (option .text )
66
+ break
67
+ else :
68
+ # Select the first non-default option
69
+ select .select_by_index (1 )
70
+
71
+ # Click the "Find my collection dates" button
72
+ find_dates_button = WebDriverWait (driver , 10 ).until (
73
+ EC .element_to_be_clickable (
74
+ (By .XPATH , "//button[text()='Find my collection dates']" )
75
+ )
76
+ )
77
+ find_dates_button .click ()
70
78
71
- # Wait for the collections table to appear
79
+ # Wait for the results table to appear
72
80
WebDriverWait (driver , 10 ).until (
73
81
EC .presence_of_element_located (
74
- (By .ID , "RESIDUALWASTEV2_COLLECTIONDATES_DISPLAYBINCOLLECTIONINFO " )
82
+ (By .XPATH , "//th[text()='Bin Type'] " )
75
83
)
76
84
)
77
85
78
86
soup = BeautifulSoup (driver .page_source , features = "html.parser" )
79
- soup . prettify ()
80
-
81
- # Get collections div
82
- next_collection_div = soup . find ( "div" , { "id" : "yourNextCollection" })
83
-
84
- # Get next collection date
85
- next_collection_date = datetime . strptime (
86
- next_collection_div . find ( "span" , { "class" : "dueDate" })
87
- .get_text ()
88
- . strip (),
89
- "%d/%m/%Y" ,
90
- )
91
-
92
- # Get next collection bins
93
- next_collection_bin = next_collection_div . findAll (
94
- "span" , { "class" : "binColour" }
95
- )
96
-
97
- # Format results
98
- for row in next_collection_bin :
99
- dict_data = {
100
- "type" : row . get_text (). strip () ,
101
- "collectionDate" : next_collection_date . strftime ( "%d/%m/%Y" ) ,
102
- }
103
- data ["bins" ].append (dict_data )
87
+
88
+ # Find the table with bin collection data
89
+ table = soup . find ( "th" , string = "Bin Type" ). find_parent ( "table" )
90
+ rows = table . find_all ( "tr" )[ 1 :] # Skip header row
91
+
92
+ for row in rows :
93
+ cells = row . find_all ( "td" )
94
+ if len ( cells ) >= 3 :
95
+ date_cell = cells [ 0 ] .get_text (). strip ()
96
+ bin_type_cell = cells [ 2 ]
97
+
98
+ # Only process rows that have a date
99
+ if date_cell :
100
+ # Get all text content including line breaks
101
+ bin_type_text = bin_type_cell . get_text ( separator = ' \n ' ). strip ()
102
+
103
+ # Split multiple bin types that appear on separate lines
104
+ bin_types = [ bt . strip () for bt in bin_type_text . split ( ' \n ' ) if bt . strip ()]
105
+
106
+ for bin_type in bin_types :
107
+ dict_data = {
108
+ "type" : bin_type ,
109
+ "collectionDate" : date_cell ,
110
+ }
111
+ data ["bins" ].append (dict_data )
104
112
105
113
data ["bins" ].sort (
106
114
key = lambda x : datetime .strptime (x .get ("collectionDate" ), "%d/%m/%Y" )
107
115
)
108
116
except Exception as e :
109
- # Here you can log the exception if needed
110
117
print (f"An error occurred: { e } " )
111
- # Optionally, re-raise the exception if you want it to propagate
112
118
raise
113
119
finally :
114
- # This block ensures that the driver is closed regardless of an exception
115
120
if driver :
116
121
driver .quit ()
117
122
return data
0 commit comments