1
- import json
2
1
import time
3
- from datetime import datetime
4
- from bs4 import BeautifulSoup
5
- from selenium .webdriver .common .by import By
6
- from selenium .webdriver .support .ui import WebDriverWait , Select
7
- from selenium .webdriver .support import expected_conditions as EC
8
- from selenium .webdriver .common .keys import Keys
2
+
3
+ import requests
4
+ from dateutil .relativedelta import relativedelta
5
+
9
6
from uk_bin_collection .uk_bin_collection .common import *
10
7
from uk_bin_collection .uk_bin_collection .get_bin_data import AbstractGetBinDataClass
11
8
12
9
10
+ # import the wonderful Beautiful Soup and the URL grabber
13
11
class CouncilClass (AbstractGetBinDataClass ):
14
12
"""
15
13
Concrete classes have to implement all abstract operations of the
@@ -18,116 +16,43 @@ class CouncilClass(AbstractGetBinDataClass):
18
16
"""
19
17
20
18
def parse_data (self , page : str , ** kwargs ) -> dict :
21
- postcode = kwargs .get ("postcode" )
22
- house_number = kwargs .get ("paon" )
23
- web_driver = kwargs .get ("web_driver" )
24
- headless = kwargs .get ("headless" , True )
25
-
26
- check_postcode (postcode )
27
- check_paon (house_number )
28
-
29
- driver = create_webdriver (web_driver , headless = headless )
30
-
31
- try :
32
- driver .get ("https://bcpportal.bcpcouncil.gov.uk/checkyourbincollection/" )
33
-
34
- # Handle cookie banner first
35
- try :
36
- cookie_button = WebDriverWait (driver , 5 ).until (
37
- EC .element_to_be_clickable ((By .XPATH , "//button[contains(text(), 'Okay')]" ))
38
- )
39
- cookie_button .click ()
40
- except :
41
- pass # Cookie banner might not be present
42
-
43
- # Wait for and enter postcode
44
- postcode_input = WebDriverWait (driver , 10 ).until (
45
- EC .presence_of_element_located ((By .CSS_SELECTOR , "input[type='text']" ))
46
- )
47
- postcode_input .clear ()
48
- postcode_input .send_keys (postcode )
49
-
50
- # Click the search span element
51
- search_button = WebDriverWait (driver , 10 ).until (
52
- EC .element_to_be_clickable ((By .ID , "searchAddress" ))
53
- )
54
- search_button .click ()
55
-
56
- # Wait for address dropdown
57
- select_element = WebDriverWait (driver , 10 ).until (
58
- EC .presence_of_element_located ((By .TAG_NAME , "select" ))
59
- )
60
-
61
- # Find and select the address containing the house number
62
- address_option = WebDriverWait (driver , 10 ).until (
63
- EC .element_to_be_clickable ((By .XPATH , f"//option[contains(text(), 'HARBOUR VIEW ROAD')]" ))
64
- )
65
- address_option .click ()
66
-
67
- # Wait for bin collection results to load
68
- WebDriverWait (driver , 15 ).until (
69
- EC .presence_of_element_located ((By .XPATH , "//td[contains(text(), 'collection')] | //th[contains(text(), 'collection')]" ))
70
- )
71
-
72
- # Find the table containing collection data by looking for a cell with 'collection' text
73
- collection_table = WebDriverWait (driver , 10 ).until (
74
- EC .presence_of_element_located ((By .XPATH , "//td[contains(text(), 'collection')]/ancestor::table | //th[contains(text(), 'collection')]/ancestor::table" ))
75
- )
76
-
77
- # Parse the table data
78
- soup = BeautifulSoup (driver .page_source , 'html.parser' )
79
- data = {"bins" : []}
80
-
81
- # Find the table containing collection information
82
- collection_cell = soup .find (['td' , 'th' ], string = lambda text : text and 'collection' in text .lower ())
83
- if collection_cell :
84
- table = collection_cell .find_parent ('table' )
85
- if table :
86
- rows = table .find_all ('tr' )
87
- for row in rows [1 :]: # Skip header row
88
- cells = row .find_all (['td' , 'th' ])
89
- if len (cells ) >= 2 : # At least bin type and one collection date
90
- bin_type = cells [0 ].get_text (strip = True )
91
- next_collection = cells [1 ].get_text (strip = True ) if len (cells ) > 1 else ""
92
- following_collection = cells [2 ].get_text (strip = True ) if len (cells ) > 2 else ""
93
-
94
-
95
- # Process next collection date
96
- if bin_type and next_collection and "No collection" not in next_collection :
97
- try :
98
- # Try multiple date formats
99
- for date_fmt in ["%A, %d %B %Y" , "%A %d %B %Y" , "%d/%m/%Y" , "%d-%m-%Y" , "%Y-%m-%d" ]:
100
- try :
101
- parsed_date = datetime .strptime (next_collection , date_fmt )
102
- data ["bins" ].append ({
103
- "type" : bin_type ,
104
- "collectionDate" : parsed_date .strftime (date_format )
105
- })
106
- break
107
- except ValueError :
108
- continue
109
- except :
110
- continue
111
-
112
- # Process following collection date
113
- if bin_type and following_collection and "No collection" not in following_collection and "download PDF" not in following_collection :
114
- try :
115
- # Clean up the following collection text (remove PDF link text)
116
- following_collection = following_collection .replace ("download PDF" , "" ).strip ()
117
- for date_fmt in ["%A, %d %B %Y" , "%A %d %B %Y" , "%d/%m/%Y" , "%d-%m-%Y" , "%Y-%m-%d" ]:
118
- try :
119
- parsed_date = datetime .strptime (following_collection , date_fmt )
120
- data ["bins" ].append ({
121
- "type" : bin_type ,
122
- "collectionDate" : parsed_date .strftime (date_format )
123
- })
124
- break
125
- except ValueError :
126
- continue
127
- except :
128
- continue
129
-
130
- return data
131
-
132
- finally :
133
- driver .quit ()
19
+ # Make a BS4 object
20
+ uprn = kwargs .get ("uprn" )
21
+ # usrn = kwargs.get("paon")
22
+ check_uprn (uprn )
23
+ # check_usrn(usrn)
24
+ bindata = {"bins" : []}
25
+
26
+ # uprn = uprn.zfill(12)
27
+
28
+ API_URL = "https://prod-17.uksouth.logic.azure.com/workflows/58253d7b7d754447acf9fe5fcf76f493/triggers/manual/paths/invoke?api-version=2016-06-01&sp=%2Ftriggers%2Fmanual%2Frun&sv=1.0&sig=TAvYIUFj6dzaP90XQCm2ElY6Cd34ze05I3ba7LKTiBs"
29
+
30
+ headers = {
31
+ "Content-Type" : "application/json" ,
32
+ "Accept" : "*/*" ,
33
+ "User-Agent" : "Mozilla/5.0" ,
34
+ "Referer" : "https://bcpportal.bcpcouncil.gov.uk/" ,
35
+ }
36
+ s = requests .session ()
37
+ data = {
38
+ "uprn" : uprn ,
39
+ }
40
+
41
+ r = s .post (API_URL , json = data , headers = headers )
42
+ r .raise_for_status ()
43
+
44
+ data = r .json ()
45
+ rows_data = data ["data" ]
46
+ for row in rows_data :
47
+ bin_type = row ["wasteContainerUsageTypeDescription" ]
48
+ collections = row ["scheduleDateRange" ]
49
+ for collection in collections :
50
+ dict_data = {
51
+ "type" : bin_type ,
52
+ "collectionDate" : datetime .strptime (
53
+ collection , "%Y-%m-%d"
54
+ ).strftime (date_format ),
55
+ }
56
+ bindata ["bins" ].append (dict_data )
57
+
58
+ return bindata
0 commit comments