1
1
import time
2
+ import datetime
2
3
4
+ from datetime import datetime
3
5
from bs4 import BeautifulSoup
4
6
from selenium .common .exceptions import TimeoutException
5
7
from selenium .webdriver .common .by import By
8
+ from selenium .webdriver .common .keys import Keys
6
9
from selenium .webdriver .support import expected_conditions as EC
7
- from selenium .webdriver .support .ui import WebDriverWait
10
+ from selenium .webdriver .support .ui import Select , WebDriverWait
8
11
9
12
from uk_bin_collection .uk_bin_collection .common import *
10
13
from uk_bin_collection .uk_bin_collection .get_bin_data import AbstractGetBinDataClass
11
14
12
- # import the wonderful Beautiful Soup and the URL grabber
13
-
14
-
15
15
class CouncilClass (AbstractGetBinDataClass ):
16
16
"""
17
17
Concrete classes have to implement all abstract operations of the
@@ -30,16 +30,18 @@ def extract_styles(self, style_str: str) -> dict:
30
30
def parse_data (self , page : str , ** kwargs ) -> dict :
31
31
driver = None
32
32
try :
33
- page = "https://www .northumberland.gov.uk/Waste/Household-waste/Household-bin-collections/Bin-Calendars.aspx "
33
+ page = "https://bincollection .northumberland.gov.uk/postcode "
34
34
35
35
data = {"bins" : []}
36
36
37
- user_paon = kwargs .get ("paon" )
38
37
user_postcode = kwargs .get ("postcode" )
38
+ user_uprn = kwargs .get ("uprn" )
39
+
40
+ check_postcode (user_postcode )
41
+ check_uprn (user_uprn )
42
+
39
43
web_driver = kwargs .get ("web_driver" )
40
44
headless = kwargs .get ("headless" )
41
- check_paon (user_paon )
42
- check_postcode (user_postcode )
43
45
44
46
# Create Selenium webdriver
45
47
driver = create_webdriver (web_driver , headless , None , __name__ )
@@ -49,106 +51,89 @@ def parse_data(self, page: str, **kwargs) -> dict:
49
51
wait = WebDriverWait (driver , 20 )
50
52
51
53
# Wait for and click cookie button
52
- cookie_button = wait .until (
53
- EC .element_to_be_clickable ((By .ID , "ccc-notify-accept" ))
54
- )
55
- cookie_button .click ()
56
-
57
- # Wait for and find house number input
58
- inputElement_hn = wait .until (
59
- EC .presence_of_element_located (
60
- (
61
- By .ID ,
62
- "p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_NCCAddressLookup_txtHouse" ,
63
- )
54
+ try :
55
+ cookie_button = wait .until (
56
+ EC .element_to_be_clickable ((By .CLASS_NAME , "accept-all" ))
64
57
)
65
- )
58
+ cookie_button .click ()
59
+ except TimeoutException :
60
+ print ("Cookie banner not found, continuing..." )
66
61
67
62
# Wait for and find postcode input
68
63
inputElement_pc = wait .until (
69
64
EC .presence_of_element_located (
70
- (
71
- By .ID ,
72
- "p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_NCCAddressLookup_txtPostcode" ,
73
- )
65
+ (By .ID , "postcode" )
74
66
)
75
67
)
76
68
77
- # Enter details
69
+ # Enter postcode and submit
78
70
inputElement_pc .send_keys (user_postcode )
79
- inputElement_hn .send_keys (user_paon )
71
+ inputElement_pc .send_keys (Keys .ENTER )
72
+
73
+ # Wait for and find house number input
74
+ selectElement_address = wait .until (
75
+ EC .presence_of_element_located (
76
+ (By .ID , "address" )
77
+ )
78
+ )
79
+
80
+ dropdown = Select (selectElement_address )
81
+ dropdown .select_by_value (user_uprn )
80
82
81
- # Click lookup button and wait for results
82
- lookup_button = wait .until (
83
+ # Click submit button and wait for results
84
+ submit_button = wait .until (
83
85
EC .element_to_be_clickable (
84
- (
85
- By .ID ,
86
- "p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_NCCAddressLookup_butLookup" ,
87
- )
86
+ (By .CLASS_NAME , "govuk-button" )
88
87
)
89
88
)
90
- lookup_button .click ()
89
+ submit_button .click ()
91
90
92
91
# Wait for results to load
93
92
route_summary = wait .until (
94
93
EC .presence_of_element_located (
95
- (
96
- By .ID ,
97
- "p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_spanRouteSummary" ,
98
- )
94
+ (By .CLASS_NAME , "govuk-table" )
99
95
)
100
96
)
101
97
98
+ now = datetime .now ()
99
+ current_month = now .month
100
+ current_year = now .year
101
+
102
102
# Get page source after everything has loaded
103
103
soup = BeautifulSoup (driver .page_source , features = "html.parser" )
104
104
105
- # Work out which bins can be collected for this address. Glass bins are only on some houses due to pilot programme.
106
- bins_collected = list (
107
- map (
108
- str .strip ,
109
- soup .find (
110
- "span" ,
111
- id = "p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_spanRouteSummary" ,
112
- )
113
- .text .replace ("Routes found: " , "" )
114
- .split ("," ),
105
+ # From the table, find all rows:
106
+ # - cell 1 is the date in format eg. 9 September (so no year value 🥲)
107
+ # - cell 2 is the day name, not useful
108
+ # - cell 3 is the bin type eg. "General waste", "Recycling", "Garden waste"
109
+ rows = soup .find_all ("tr" , class_ = "govuk-table__row" )
110
+
111
+ for row in rows :
112
+ bin_type = row .find_all ("td" )[- 1 ].text .strip ()
113
+
114
+ collection_date_string = row .find ('th' ).text .strip ()
115
+
116
+ # sometimes but not always the day is written "22nd" instead of 22 so make sure we get a proper int
117
+ collection_date_day = [int (i ) for i in collection_date_string .split (' ' ).split () if i .isdigit ()]
118
+ collection_date_month_name = collection_date_string .split (' ' )[1 ]
119
+
120
+ # if we are currently in Oct, Nov, or Dec and the collection month is Jan, Feb, or Mar, let's assume its next year
121
+ if (current_month >= 10 ) and (collection_date_month_name in ["January" , "February" , "March" ]):
122
+ collection_date_year = current_year + 1
123
+ else :
124
+ collection_date_year = current_year
125
+
126
+ collection_date = time .strptime (
127
+ f"{ collection_date_day [0 ]} { collection_date_month_name } { collection_date_year } " , "%d %B %Y"
115
128
)
116
- )
117
129
118
- # Get the background colour for each of them...
119
- bins_by_colours = dict ()
120
- for bin in bins_collected :
121
- if "(but no dates found)" in bin :
122
- continue
123
- style_str = soup .find ("span" , string = bin )["style" ]
124
- bin_colour = self .extract_styles (style_str )["background-color" ].upper ()
125
- bins_by_colours [bin_colour ] = bin
126
-
127
- # Work through the tables gathering the dates, if the cell has a background colour - match it to the bin type.
128
- calander_tables = soup .find_all ("table" , title = "Calendar" )
129
- for table in calander_tables :
130
- # Get month and year
131
- # First row in table is the header
132
- rows = table .find_all ("tr" )
133
- month_and_year = (
134
- rows [0 ].find ("table" , class_ = "calCtrlTitle" ).find ("td" ).string
130
+ # Add it to the data
131
+ data ["bins" ].append (
132
+ {
133
+ "type" : bin_type ,
134
+ "collectionDate" : time .strftime (date_format , collection_date ),
135
+ }
135
136
)
136
- bin_days = table .find_all ("td" , class_ = "calCtrlDay" )
137
- for day in bin_days :
138
- day_styles = self .extract_styles (day ["style" ])
139
- if "background-color" in day_styles :
140
- colour = day_styles ["background-color" ].upper ()
141
- date = time .strptime (
142
- f"{ day .string } { month_and_year } " , "%d %B %Y"
143
- )
144
-
145
- # Add it to the data
146
- data ["bins" ].append (
147
- {
148
- "type" : bins_by_colours [colour ],
149
- "collectionDate" : time .strftime (date_format , date ),
150
- }
151
- )
152
137
except Exception as e :
153
138
# Here you can log the exception if needed
154
139
print (f"An error occurred: { e } " )
0 commit comments