1
1
import time
2
+ import datetime
2
3
4
+ from datetime import datetime
3
5
from bs4 import BeautifulSoup
4
6
from selenium .common .exceptions import TimeoutException
5
7
from selenium .webdriver .common .by import By
8
+ from selenium .webdriver .common .keys import Keys
6
9
from selenium .webdriver .support import expected_conditions as EC
7
- from selenium .webdriver .support .ui import WebDriverWait
10
+ from selenium .webdriver .support .ui import Select , WebDriverWait
8
11
9
12
from uk_bin_collection .uk_bin_collection .common import *
10
13
from uk_bin_collection .uk_bin_collection .get_bin_data import AbstractGetBinDataClass
11
14
12
- # import the wonderful Beautiful Soup and the URL grabber
13
-
14
-
15
15
class CouncilClass (AbstractGetBinDataClass ):
16
16
"""
17
17
Concrete classes have to implement all abstract operations of the
@@ -30,16 +30,18 @@ def extract_styles(self, style_str: str) -> dict:
30
30
def parse_data (self , page : str , ** kwargs ) -> dict :
31
31
driver = None
32
32
try :
33
- page = "https://www .northumberland.gov.uk/Waste/Household-waste/Household-bin-collections/Bin-Calendars.aspx "
33
+ page = "https://bincollection .northumberland.gov.uk/postcode "
34
34
35
35
data = {"bins" : []}
36
36
37
- user_paon = kwargs .get ("paon" )
38
37
user_postcode = kwargs .get ("postcode" )
38
+ user_uprn = kwargs .get ("uprn" )
39
+
40
+ check_postcode (user_postcode )
41
+ check_uprn (user_uprn )
42
+
39
43
web_driver = kwargs .get ("web_driver" )
40
44
headless = kwargs .get ("headless" )
41
- check_paon (user_paon )
42
- check_postcode (user_postcode )
43
45
44
46
# Create Selenium webdriver
45
47
driver = create_webdriver (web_driver , headless , None , __name__ )
@@ -50,105 +52,87 @@ def parse_data(self, page: str, **kwargs) -> dict:
50
52
51
53
# Wait for and click cookie button
52
54
cookie_button = wait .until (
53
- EC .element_to_be_clickable ((By .ID , "ccc-notify-accept" ))
55
+ EC .element_to_be_clickable (
56
+ (By .CLASS_NAME , "accept-all" )
57
+ )
54
58
)
55
59
cookie_button .click ()
56
60
57
- # Wait for and find house number input
58
- inputElement_hn = wait .until (
61
+ # Wait for and find postcode input
62
+ inputElement_pc = wait .until (
59
63
EC .presence_of_element_located (
60
- (
61
- By .ID ,
62
- "p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_NCCAddressLookup_txtHouse" ,
63
- )
64
+ (By .ID , "postcode" )
64
65
)
65
66
)
66
67
67
- # Wait for and find postcode input
68
- inputElement_pc = wait .until (
68
+ # Enter postcode and submit
69
+ inputElement_pc .send_keys (user_postcode )
70
+ inputElement_pc .send_keys (Keys .ENTER )
71
+
72
+ # Wait for and find house number input
73
+ selectElement_address = wait .until (
69
74
EC .presence_of_element_located (
70
- (
71
- By .ID ,
72
- "p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_NCCAddressLookup_txtPostcode" ,
73
- )
75
+ (By .ID , "address" )
74
76
)
75
77
)
76
78
77
- # Enter details
78
- inputElement_pc .send_keys (user_postcode )
79
- inputElement_hn .send_keys (user_paon )
79
+ dropdown = Select (selectElement_address )
80
+ dropdown .select_by_value (user_uprn )
80
81
81
- # Click lookup button and wait for results
82
- lookup_button = wait .until (
82
+ # Click submit button and wait for results
83
+ submit_button = wait .until (
83
84
EC .element_to_be_clickable (
84
- (
85
- By .ID ,
86
- "p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_NCCAddressLookup_butLookup" ,
87
- )
85
+ (By .CLASS_NAME , "govuk-button" )
88
86
)
89
87
)
90
- lookup_button .click ()
88
+ submit_button .click ()
91
89
92
90
# Wait for results to load
93
91
route_summary = wait .until (
94
92
EC .presence_of_element_located (
95
- (
96
- By .ID ,
97
- "p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_spanRouteSummary" ,
98
- )
93
+ (By .CLASS_NAME , "govuk-table" )
99
94
)
100
95
)
101
96
97
+ now = datetime .now ()
98
+ current_month = now .month
99
+ current_year = now .year
100
+
102
101
# Get page source after everything has loaded
103
102
soup = BeautifulSoup (driver .page_source , features = "html.parser" )
104
103
105
- # Work out which bins can be collected for this address. Glass bins are only on some houses due to pilot programme.
106
- bins_collected = list (
107
- map (
108
- str .strip ,
109
- soup .find (
110
- "span" ,
111
- id = "p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_spanRouteSummary" ,
112
- )
113
- .text .replace ("Routes found: " , "" )
114
- .split ("," ),
104
+ # From the table, find all rows:
105
+ # - cell 1 is the date in format eg. 9 September (so no year value 🥲)
106
+ # - cell 2 is the day name, not useful
107
+ # - cell 3 is the bin type eg. "General waste", "Recycling", "Garden waste"
108
+ rows = soup .find ("tbody" , class_ = "govuk-table__body" ).find_all ("tr" , class_ = "govuk-table__row" )
109
+
110
+ for row in rows :
111
+ bin_type = row .find_all ("td" )[- 1 ].text .strip ()
112
+
113
+ collection_date_string = row .find ('th' ).text .strip ()
114
+
115
+ # sometimes but not always the day is written "22nd" instead of 22 so make sure we get a proper int
116
+ collection_date_day = "" .join ([i for i in list (collection_date_string .split (" " )[0 ]) if i .isdigit ()])
117
+ collection_date_month_name = collection_date_string .split (" " )[1 ]
118
+
119
+ # if we are currently in Oct, Nov, or Dec and the collection month is Jan, Feb, or Mar, let's assume its next year
120
+ if (current_month >= 10 ) and (collection_date_month_name in ["January" , "February" , "March" ]):
121
+ collection_date_year = current_year + 1
122
+ else :
123
+ collection_date_year = current_year
124
+
125
+ collection_date = time .strptime (
126
+ f"{ collection_date_day } { collection_date_month_name } { collection_date_year } " , "%d %B %Y"
115
127
)
116
- )
117
128
118
- # Get the background colour for each of them...
119
- bins_by_colours = dict ()
120
- for bin in bins_collected :
121
- if "(but no dates found)" in bin :
122
- continue
123
- style_str = soup .find ("span" , string = bin )["style" ]
124
- bin_colour = self .extract_styles (style_str )["background-color" ].upper ()
125
- bins_by_colours [bin_colour ] = bin
126
-
127
- # Work through the tables gathering the dates, if the cell has a background colour - match it to the bin type.
128
- calander_tables = soup .find_all ("table" , title = "Calendar" )
129
- for table in calander_tables :
130
- # Get month and year
131
- # First row in table is the header
132
- rows = table .find_all ("tr" )
133
- month_and_year = (
134
- rows [0 ].find ("table" , class_ = "calCtrlTitle" ).find ("td" ).string
129
+ # Add it to the data
130
+ data ["bins" ].append (
131
+ {
132
+ "type" : bin_type ,
133
+ "collectionDate" : time .strftime (date_format , collection_date ),
134
+ }
135
135
)
136
- bin_days = table .find_all ("td" , class_ = "calCtrlDay" )
137
- for day in bin_days :
138
- day_styles = self .extract_styles (day ["style" ])
139
- if "background-color" in day_styles :
140
- colour = day_styles ["background-color" ].upper ()
141
- date = time .strptime (
142
- f"{ day .string } { month_and_year } " , "%d %B %Y"
143
- )
144
-
145
- # Add it to the data
146
- data ["bins" ].append (
147
- {
148
- "type" : bins_by_colours [colour ],
149
- "collectionDate" : time .strftime (date_format , date ),
150
- }
151
- )
152
136
except Exception as e :
153
137
# Here you can log the exception if needed
154
138
print (f"An error occurred: { e } " )
0 commit comments