Skip to content

Commit 620868e

Browse files
authored
Update NorwichCityCouncil.py
Fixed for new URL
1 parent a2541f1 commit 620868e

File tree

1 file changed

+67
-66
lines changed

1 file changed

+67
-66
lines changed
Lines changed: 67 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
import time
2-
31
import requests
42
from bs4 import BeautifulSoup
53

@@ -17,76 +15,79 @@ class CouncilClass(AbstractGetBinDataClass):
1715

1816
def parse_data(self, page: str, **kwargs) -> dict:
1917

20-
user_uprn = kwargs.get("uprn")
21-
check_uprn(user_uprn)
18+
user_postcode = kwargs.get("postcode")
19+
user_paon = kwargs.get("paon")
20+
check_postcode(user_postcode)
21+
check_paon(user_paon)
2222
bindata = {"bins": []}
2323

24-
API_URL = "https://maps.norwich.gov.uk/arcgis/rest/services/MyNorwich/PropertyDetails/FeatureServer/2/query"
25-
26-
params = {
27-
"f": "json",
28-
"where": f"UPRN='{user_uprn}' or UPRN='0{user_uprn}'",
29-
"returnGeometry": "true",
30-
"spatialRel": "esriSpatialRelIntersects",
31-
"geometryType": "esriGeometryPolygon",
32-
"inSR": "4326",
33-
"outFields": "*",
34-
"outSR": "4326",
35-
"resultRecordCount": "1000",
24+
URI = "https://bnr-wrp.whitespacews.com/"
25+
26+
session = requests.Session()
27+
28+
# get link from first page as has some kind of unique hash
29+
r = session.get(
30+
URI,
31+
)
32+
r.raise_for_status()
33+
soup = BeautifulSoup(r.text, features="html.parser")
34+
35+
alink = soup.find("a", text="View my collections")
36+
37+
if alink is None:
38+
raise Exception("Initial page did not load correctly")
39+
40+
# greplace 'seq' query string to skip next step
41+
nextpageurl = alink["href"].replace("seq=1", "seq=2")
42+
43+
data = {
44+
"address_name_number": user_paon,
45+
"address_postcode": user_postcode,
3646
}
3747

38-
r = requests.get(API_URL, params=params)
39-
40-
data = r.json()
41-
data = data["features"][0]["attributes"]["WasteCollectionHtml"]
42-
soup = BeautifulSoup(data, "html.parser")
43-
44-
alternateCheck = soup.find("p")
45-
if alternateCheck.text.__contains__("alternate"):
46-
alternateCheck = True
47-
else:
48-
alternateCheck = False
49-
50-
strong = soup.find_all("strong")
51-
collections = []
52-
53-
if alternateCheck:
54-
bin_types = strong[2].text.strip().replace(".", "").split(" and ")
55-
for bin in bin_types:
56-
collections.append(
57-
(
58-
bin.capitalize(),
59-
datetime.strptime(strong[1].text.strip(), date_format),
60-
)
61-
)
62-
63-
else:
64-
p_tag = soup.find_all("p")
65-
i = 1
66-
for p in p_tag:
67-
bin_types = (
68-
p.text.split("Your ")[1].split(" is collected")[0].split(" and ")
69-
)
70-
for bin in bin_types:
71-
collections.append(
72-
(
73-
bin.capitalize(),
74-
datetime.strptime(strong[1].text.strip(), date_format),
75-
)
76-
)
77-
i += 2
78-
79-
if len(strong) > 3:
80-
collections.append(
81-
("Garden", datetime.strptime(strong[4].text.strip(), date_format))
82-
)
83-
84-
ordered_data = sorted(collections, key=lambda x: x[1])
85-
for item in ordered_data:
48+
# get list of addresses
49+
r = session.post(nextpageurl, data)
50+
r.raise_for_status()
51+
52+
soup = BeautifulSoup(r.text, features="html.parser")
53+
54+
# get first address (if you don't enter enough argument values this won't find the right address)
55+
alink = soup.find("div", id="property_list").find("a")
56+
57+
if alink is None:
58+
raise Exception("Address not found")
59+
60+
nextpageurl = URI + alink["href"]
61+
62+
# get collection page
63+
r = session.get(
64+
nextpageurl,
65+
)
66+
r.raise_for_status()
67+
soup = BeautifulSoup(r.text, features="html.parser")
68+
69+
if soup.find("span", id="waste-hint"):
70+
raise Exception("No scheduled services at this address")
71+
72+
u1s = soup.find("section", id="scheduled-collections").find_all("u1")
73+
74+
for u1 in u1s:
75+
lis = u1.find_all("li", recursive=False)
76+
77+
date = lis[1].text.replace("\n", "")
78+
bin_type = lis[2].text.replace("\n", "")
79+
8680
dict_data = {
87-
"type": item[0] + " bin",
88-
"collectionDate": item[1].strftime(date_format),
81+
"type": bin_type,
82+
"collectionDate": datetime.strptime(
83+
date,
84+
"%d/%m/%Y",
85+
).strftime(date_format),
8986
}
9087
bindata["bins"].append(dict_data)
9188

89+
bindata["bins"].sort(
90+
key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
91+
)
92+
9293
return bindata

0 commit comments

Comments
 (0)