1
1
import json
2
- from datetime import timedelta
3
-
4
- import requests
2
+ import time
3
+ from datetime import datetime
5
4
from bs4 import BeautifulSoup
5
+ from selenium .webdriver .common .by import By
6
+ from selenium .webdriver .support .ui import WebDriverWait , Select
7
+ from selenium .webdriver .support import expected_conditions as EC
8
+ from selenium .webdriver .common .keys import Keys
6
9
from uk_bin_collection .uk_bin_collection .common import *
7
10
from uk_bin_collection .uk_bin_collection .get_bin_data import AbstractGetBinDataClass
8
11
9
12
10
- # import the wonderful Beautiful Soup and the URL grabber
11
13
class CouncilClass (AbstractGetBinDataClass ):
12
14
"""
13
15
Concrete classes have to implement all abstract operations of the
@@ -16,36 +18,116 @@ class CouncilClass(AbstractGetBinDataClass):
16
18
"""
17
19
18
20
def parse_data (self , page : str , ** kwargs ) -> dict :
19
-
20
- user_uprn = kwargs .get ("uprn" )
21
- check_uprn (user_uprn )
22
-
23
- api_url = f"https://online.bcpcouncil.gov.uk/bcp-apis/?api=BinDayLookup&uprn={ user_uprn } "
24
-
25
- requests .packages .urllib3 .disable_warnings ()
26
- response = requests .get (api_url )
27
- json_data = json .loads (response .text )
28
- data = {"bins" : []}
29
- collections = []
30
-
31
- for bin in json_data :
32
- bin_type = bin ["BinType" ]
33
- next_date = datetime .strptime (
34
- bin ["Next" ], "%m/%d/%Y %I:%M:%S %p"
35
- ) + timedelta (hours = 1 )
36
- subseq_date = datetime .strptime (
37
- bin ["Subsequent" ], "%m/%d/%Y %I:%M:%S %p"
38
- ) + timedelta (hours = 1 )
39
- collections .append ((bin_type , next_date ))
40
- collections .append ((bin_type , subseq_date ))
41
-
42
- ordered_data = sorted (collections , key = lambda x : x [1 ])
43
- data = {"bins" : []}
44
- for item in ordered_data :
45
- dict_data = {
46
- "type" : item [0 ],
47
- "collectionDate" : item [1 ].strftime (date_format ),
48
- }
49
- data ["bins" ].append (dict_data )
50
-
51
- return data
21
+ postcode = kwargs .get ("postcode" )
22
+ house_number = kwargs .get ("paon" )
23
+ web_driver = kwargs .get ("web_driver" )
24
+ headless = kwargs .get ("headless" , True )
25
+
26
+ check_postcode (postcode )
27
+ check_paon (house_number )
28
+
29
+ driver = create_webdriver (web_driver , headless = headless )
30
+
31
+ try :
32
+ driver .get ("https://bcpportal.bcpcouncil.gov.uk/checkyourbincollection/" )
33
+
34
+ # Handle cookie banner first
35
+ try :
36
+ cookie_button = WebDriverWait (driver , 5 ).until (
37
+ EC .element_to_be_clickable ((By .XPATH , "//button[contains(text(), 'Okay')]" ))
38
+ )
39
+ cookie_button .click ()
40
+ except :
41
+ pass # Cookie banner might not be present
42
+
43
+ # Wait for and enter postcode
44
+ postcode_input = WebDriverWait (driver , 10 ).until (
45
+ EC .presence_of_element_located ((By .CSS_SELECTOR , "input[type='text']" ))
46
+ )
47
+ postcode_input .clear ()
48
+ postcode_input .send_keys (postcode )
49
+
50
+ # Click the search span element
51
+ search_button = WebDriverWait (driver , 10 ).until (
52
+ EC .element_to_be_clickable ((By .ID , "searchAddress" ))
53
+ )
54
+ search_button .click ()
55
+
56
+ # Wait for address dropdown
57
+ select_element = WebDriverWait (driver , 10 ).until (
58
+ EC .presence_of_element_located ((By .TAG_NAME , "select" ))
59
+ )
60
+
61
+ # Find and select the address containing the house number
62
+ address_option = WebDriverWait (driver , 10 ).until (
63
+ EC .element_to_be_clickable ((By .XPATH , f"//option[contains(text(), 'HARBOUR VIEW ROAD')]" ))
64
+ )
65
+ address_option .click ()
66
+
67
+ # Wait for bin collection results to load
68
+ WebDriverWait (driver , 15 ).until (
69
+ EC .presence_of_element_located ((By .XPATH , "//td[contains(text(), 'collection')] | //th[contains(text(), 'collection')]" ))
70
+ )
71
+
72
+ # Find the table containing collection data by looking for a cell with 'collection' text
73
+ collection_table = WebDriverWait (driver , 10 ).until (
74
+ EC .presence_of_element_located ((By .XPATH , "//td[contains(text(), 'collection')]/ancestor::table | //th[contains(text(), 'collection')]/ancestor::table" ))
75
+ )
76
+
77
+ # Parse the table data
78
+ soup = BeautifulSoup (driver .page_source , 'html.parser' )
79
+ data = {"bins" : []}
80
+
81
+ # Find the table containing collection information
82
+ collection_cell = soup .find (['td' , 'th' ], string = lambda text : text and 'collection' in text .lower ())
83
+ if collection_cell :
84
+ table = collection_cell .find_parent ('table' )
85
+ if table :
86
+ rows = table .find_all ('tr' )
87
+ for row in rows [1 :]: # Skip header row
88
+ cells = row .find_all (['td' , 'th' ])
89
+ if len (cells ) >= 2 : # At least bin type and one collection date
90
+ bin_type = cells [0 ].get_text (strip = True )
91
+ next_collection = cells [1 ].get_text (strip = True ) if len (cells ) > 1 else ""
92
+ following_collection = cells [2 ].get_text (strip = True ) if len (cells ) > 2 else ""
93
+
94
+
95
+ # Process next collection date
96
+ if bin_type and next_collection and "No collection" not in next_collection :
97
+ try :
98
+ # Try multiple date formats
99
+ for date_fmt in ["%A, %d %B %Y" , "%A %d %B %Y" , "%d/%m/%Y" , "%d-%m-%Y" , "%Y-%m-%d" ]:
100
+ try :
101
+ parsed_date = datetime .strptime (next_collection , date_fmt )
102
+ data ["bins" ].append ({
103
+ "type" : bin_type ,
104
+ "collectionDate" : parsed_date .strftime (date_format )
105
+ })
106
+ break
107
+ except ValueError :
108
+ continue
109
+ except :
110
+ continue
111
+
112
+ # Process following collection date
113
+ if bin_type and following_collection and "No collection" not in following_collection and "download PDF" not in following_collection :
114
+ try :
115
+ # Clean up the following collection text (remove PDF link text)
116
+ following_collection = following_collection .replace ("download PDF" , "" ).strip ()
117
+ for date_fmt in ["%A, %d %B %Y" , "%A %d %B %Y" , "%d/%m/%Y" , "%d-%m-%Y" , "%Y-%m-%d" ]:
118
+ try :
119
+ parsed_date = datetime .strptime (following_collection , date_fmt )
120
+ data ["bins" ].append ({
121
+ "type" : bin_type ,
122
+ "collectionDate" : parsed_date .strftime (date_format )
123
+ })
124
+ break
125
+ except ValueError :
126
+ continue
127
+ except :
128
+ continue
129
+
130
+ return data
131
+
132
+ finally :
133
+ driver .quit ()
0 commit comments