1
- import time
2
-
3
1
import requests
4
2
from bs4 import BeautifulSoup
5
3
@@ -17,76 +15,79 @@ class CouncilClass(AbstractGetBinDataClass):
17
15
18
16
def parse_data (self , page : str , ** kwargs ) -> dict :
19
17
20
- user_uprn = kwargs .get ("uprn" )
21
- check_uprn (user_uprn )
18
+ user_postcode = kwargs .get ("postcode" )
19
+ user_paon = kwargs .get ("paon" )
20
+ check_postcode (user_postcode )
21
+ check_paon (user_paon )
22
22
bindata = {"bins" : []}
23
23
24
- API_URL = "https://maps.norwich.gov.uk/arcgis/rest/services/MyNorwich/PropertyDetails/FeatureServer/2/query"
25
-
26
- params = {
27
- "f" : "json" ,
28
- "where" : f"UPRN='{ user_uprn } ' or UPRN='0{ user_uprn } '" ,
29
- "returnGeometry" : "true" ,
30
- "spatialRel" : "esriSpatialRelIntersects" ,
31
- "geometryType" : "esriGeometryPolygon" ,
32
- "inSR" : "4326" ,
33
- "outFields" : "*" ,
34
- "outSR" : "4326" ,
35
- "resultRecordCount" : "1000" ,
24
+ URI = "https://bnr-wrp.whitespacews.com/"
25
+
26
+ session = requests .Session ()
27
+
28
+ # get link from first page as has some kind of unique hash
29
+ r = session .get (
30
+ URI ,
31
+ )
32
+ r .raise_for_status ()
33
+ soup = BeautifulSoup (r .text , features = "html.parser" )
34
+
35
+ alink = soup .find ("a" , text = "View my collections" )
36
+
37
+ if alink is None :
38
+ raise Exception ("Initial page did not load correctly" )
39
+
40
+ # greplace 'seq' query string to skip next step
41
+ nextpageurl = alink ["href" ].replace ("seq=1" , "seq=2" )
42
+
43
+ data = {
44
+ "address_name_number" : user_paon ,
45
+ "address_postcode" : user_postcode ,
36
46
}
37
47
38
- r = requests .get (API_URL , params = params )
39
-
40
- data = r .json ()
41
- data = data ["features" ][0 ]["attributes" ]["WasteCollectionHtml" ]
42
- soup = BeautifulSoup (data , "html.parser" )
43
-
44
- alternateCheck = soup .find ("p" )
45
- if alternateCheck .text .__contains__ ("alternate" ):
46
- alternateCheck = True
47
- else :
48
- alternateCheck = False
49
-
50
- strong = soup .find_all ("strong" )
51
- collections = []
52
-
53
- if alternateCheck :
54
- bin_types = strong [2 ].text .strip ().replace ("." , "" ).split (" and " )
55
- for bin in bin_types :
56
- collections .append (
57
- (
58
- bin .capitalize (),
59
- datetime .strptime (strong [1 ].text .strip (), date_format ),
60
- )
61
- )
62
-
63
- else :
64
- p_tag = soup .find_all ("p" )
65
- i = 1
66
- for p in p_tag :
67
- bin_types = (
68
- p .text .split ("Your " )[1 ].split (" is collected" )[0 ].split (" and " )
69
- )
70
- for bin in bin_types :
71
- collections .append (
72
- (
73
- bin .capitalize (),
74
- datetime .strptime (strong [1 ].text .strip (), date_format ),
75
- )
76
- )
77
- i += 2
78
-
79
- if len (strong ) > 3 :
80
- collections .append (
81
- ("Garden" , datetime .strptime (strong [4 ].text .strip (), date_format ))
82
- )
83
-
84
- ordered_data = sorted (collections , key = lambda x : x [1 ])
85
- for item in ordered_data :
48
+ # get list of addresses
49
+ r = session .post (nextpageurl , data )
50
+ r .raise_for_status ()
51
+
52
+ soup = BeautifulSoup (r .text , features = "html.parser" )
53
+
54
+ # get first address (if you don't enter enough argument values this won't find the right address)
55
+ alink = soup .find ("div" , id = "property_list" ).find ("a" )
56
+
57
+ if alink is None :
58
+ raise Exception ("Address not found" )
59
+
60
+ nextpageurl = URI + alink ["href" ]
61
+
62
+ # get collection page
63
+ r = session .get (
64
+ nextpageurl ,
65
+ )
66
+ r .raise_for_status ()
67
+ soup = BeautifulSoup (r .text , features = "html.parser" )
68
+
69
+ if soup .find ("span" , id = "waste-hint" ):
70
+ raise Exception ("No scheduled services at this address" )
71
+
72
+ u1s = soup .find ("section" , id = "scheduled-collections" ).find_all ("u1" )
73
+
74
+ for u1 in u1s :
75
+ lis = u1 .find_all ("li" , recursive = False )
76
+
77
+ date = lis [1 ].text .replace ("\n " , "" )
78
+ bin_type = lis [2 ].text .replace ("\n " , "" )
79
+
86
80
dict_data = {
87
- "type" : item [0 ] + " bin" ,
88
- "collectionDate" : item [1 ].strftime (date_format ),
81
+ "type" : bin_type ,
82
+ "collectionDate" : datetime .strptime (
83
+ date ,
84
+ "%d/%m/%Y" ,
85
+ ).strftime (date_format ),
89
86
}
90
87
bindata ["bins" ].append (dict_data )
91
88
89
+ bindata ["bins" ].sort (
90
+ key = lambda x : datetime .strptime (x .get ("collectionDate" ), date_format )
91
+ )
92
+
92
93
return bindata
0 commit comments