Skip to content

Commit a53ed3b

Browse files
authored
Merge branch 'aug_25_release2' into buckinghamshire_api_fetch
2 parents 88cd8f5 + 321f0d1 commit a53ed3b

13 files changed

+1297
-825
lines changed

uk_bin_collection/tests/input.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,7 @@
253253
"postcode": "BL1 5PQ",
254254
"skip_get_url": true,
255255
"uprn": "100010886936",
256-
"url": "https://carehomes.bolton.gov.uk/bins.aspx",
256+
"url": "https://web.bolton.gov.uk/bins.aspx",
257257
"web_driver": "http://selenium:4444",
258258
"wiki_name": "Bolton",
259259
"wiki_note": "To get the UPRN, you will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search). Previously required a single field that was UPRN and full address; now requires UPRN and postcode as separate fields.",

uk_bin_collection/uk_bin_collection/councils/BoltonCouncil.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def parse_data(self, page: str, **kwargs) -> dict:
3535
data = {"bins": []}
3636

3737
# Get our initial session running
38-
page = "https://carehomes.bolton.gov.uk/bins.aspx"
38+
page = "https://web.bolton.gov.uk/bins.aspx"
3939

4040
driver = create_webdriver(web_driver, headless, None, __name__)
4141
driver.get(page)
Lines changed: 82 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
1-
import json
1+
import time
2+
23
import requests
3-
from datetime import datetime
4+
from dateutil.relativedelta import relativedelta
45

56
from uk_bin_collection.uk_bin_collection.common import *
67
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
78

89

10+
# import the wonderful Beautiful Soup and the URL grabber
911
class CouncilClass(AbstractGetBinDataClass):
1012
"""
1113
Concrete classes have to implement all abstract operations of the
@@ -14,28 +16,84 @@ class CouncilClass(AbstractGetBinDataClass):
1416
"""
1517

1618
def parse_data(self, page: str, **kwargs) -> dict:
17-
user_uprn = kwargs.get("uprn")
18-
check_uprn(user_uprn)
19+
# Make a BS4 object
20+
uprn = kwargs.get("uprn")
21+
# usrn = kwargs.get("paon")
22+
check_uprn(uprn)
23+
# check_usrn(usrn)
1924
bindata = {"bins": []}
20-
21-
# Make API request
22-
api_url = f"https://east-herts.co.uk/api/services/{user_uprn}"
23-
response = requests.get(api_url)
24-
response.raise_for_status()
25-
26-
data = response.json()
27-
today = datetime.now().date()
28-
29-
for service in data.get("services", []):
30-
collection_date_str = service.get("collectionDate")
31-
if collection_date_str:
32-
collection_date = datetime.strptime(collection_date_str, "%Y-%m-%d").date()
33-
# Only include future dates
34-
if collection_date >= today:
35-
dict_data = {
36-
"type": service.get("binType", ""),
37-
"collectionDate": collection_date.strftime("%d/%m/%Y"),
25+
26+
# uprn = uprn.zfill(12)
27+
28+
SESSION_URL = "https://eastherts-self.achieveservice.com/authapi/isauthenticated?uri=https%253A%252F%252Feastherts-self.achieveservice.com%252FAchieveForms%252F%253Fmode%253Dfill%2526consentMessage%253Dyes%2526form_uri%253Dsandbox-publish%253A%252F%252FAF-Process-98782935-6101-4962-9a55-5923e76057b6%252FAF-Stage-dcd0ec18-dfb4-496a-a266-bd8fadaa28a7%252Fdefinition.json%2526process%253D1%2526process_uri%253Dsandbox-processes%253A%252F%252FAF-Process-98782935-6101-4962-9a55-5923e76057b6%2526process_id%253DAF-Process-98782935-6101-4962-9a55-5923e76057b6&hostname=eastherts-self.achieveservice.com&withCredentials=true"
29+
30+
API_URL = "https://eastherts-self.achieveservice.com/apibroker/runLookup"
31+
32+
headers = {
33+
"Content-Type": "application/json",
34+
"Accept": "*/*",
35+
"User-Agent": "Mozilla/5.0",
36+
"X-Requested-With": "XMLHttpRequest",
37+
"Referer": "https://eastherts-self.achieveservice.com/fillform/?iframe_id=fillform-frame-1&db_id=",
38+
}
39+
s = requests.session()
40+
r = s.get(SESSION_URL)
41+
r.raise_for_status()
42+
session_data = r.json()
43+
sid = session_data["auth-session"]
44+
params = {
45+
# unix_timestamp
46+
"_": str(int(time.time() * 1000)),
47+
"sid": sid,
48+
}
49+
50+
params = {
51+
"id": "683d9ff0e299d",
52+
"repeat_against": "",
53+
"noRetry": "true",
54+
"getOnlyTokens": "undefined",
55+
"log_id": "",
56+
"app_name": "AF-Renderer::Self",
57+
# unix_timestamp
58+
"_": str(int(time.time() * 1000)),
59+
"sid": sid,
60+
}
61+
62+
data = {
63+
"formValues": {
64+
"Collection Days": {
65+
"inputUPRN": {
66+
"value": uprn,
3867
}
39-
bindata["bins"].append(dict_data)
40-
68+
},
69+
}
70+
}
71+
72+
r = s.post(API_URL, json=data, headers=headers, params=params)
73+
r.raise_for_status()
74+
75+
data = r.json()
76+
rows_data = data["integration"]["transformed"]["rows_data"]["0"]
77+
if not isinstance(rows_data, dict):
78+
raise ValueError("Invalid data returned from API")
79+
80+
# Extract each service's relevant details for the bin schedule
81+
for key, value in rows_data.items():
82+
if key.endswith("NextDate"):
83+
BinType = key.replace("NextDate", "ServiceName")
84+
for key2, value2 in rows_data.items():
85+
if key2 == BinType:
86+
BinType = value2
87+
next_collection = datetime.strptime(
88+
remove_ordinal_indicator_from_date_string(value), "%A %d %B"
89+
).replace(year=datetime.now().year)
90+
if datetime.now().month == 12 and next_collection.month == 1:
91+
next_collection = next_collection + relativedelta(years=1)
92+
93+
dict_data = {
94+
"type": BinType,
95+
"collectionDate": next_collection.strftime(date_format),
96+
}
97+
bindata["bins"].append(dict_data)
98+
4199
return bindata

uk_bin_collection/uk_bin_collection/councils/HinckleyandBosworthBoroughCouncil.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,16 @@ def parse_data(self, page: str, **kwargs) -> dict:
2020
check_uprn(user_uprn)
2121
bindata = {"bins": []}
2222

23+
headers = {
24+
"Origin": "https://www.hinckley-bosworth.gov.uk",
25+
"Referer": "https://www.hinckley-bosworth.gov.uk",
26+
"User-Agent": "Mozilla/5.0",
27+
}
28+
2329
URI = f"https://www.hinckley-bosworth.gov.uk/set-location?id={user_uprn}&redirect=refuse&rememberloc="
2430

2531
# Make the GET request
26-
response = requests.get(URI)
32+
response = requests.get(URI, headers=headers)
2733

2834
# Parse the HTML
2935
soup = BeautifulSoup(response.content, "html.parser")

uk_bin_collection/uk_bin_collection/councils/IpswichBoroughCouncil.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,9 @@ class CouncilClass(AbstractGetBinDataClass):
3131
IBC_ENDPOINT = "https://app.ipswich.gov.uk/bin-collection/"
3232

3333
def transform_date(self, date_str):
34-
date_str = re.sub(r"(st|nd|rd|th)", "", date_str) # Remove ordinal suffixes
34+
date_str = re.sub(
35+
r"(\d{1,2})(st|nd|rd|th)", r"\1", date_str
36+
) # Remove ordinal suffixes
3537
date_obj = datetime.strptime(date_str, "%A %d %B %Y")
3638
return date_obj.strftime(date_format)
3739

uk_bin_collection/uk_bin_collection/councils/LichfieldDistrictCouncil.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,16 @@ def parse_data(self, page: str, **kwargs) -> dict:
2424
def solve(s):
2525
return re.sub(r"(\d)(st|nd|rd|th)", r"\1", s)
2626

27+
headers = {
28+
"Origin": "https://www.lichfielddc.gov.uk",
29+
"Referer": "https://www.lichfielddc.gov.uk",
30+
"User-Agent": "Mozilla/5.0",
31+
}
32+
2733
URI = f"https://www.lichfielddc.gov.uk/homepage/6/bin-collection-dates?uprn={user_uprn}"
2834

2935
# Make the GET request
30-
response = requests.get(URI)
36+
response = requests.get(URI, headers=headers)
3137

3238
soup = BeautifulSoup(response.text, "html.parser")
3339

uk_bin_collection/uk_bin_collection/councils/NorthEastLincs.py

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import pandas as pd
2+
import requests
23
from bs4 import BeautifulSoup
4+
35
from uk_bin_collection.uk_bin_collection.common import date_format
46
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
57

@@ -12,15 +14,26 @@ class CouncilClass(AbstractGetBinDataClass):
1214
"""
1315

1416
def parse_data(self, page: str, **kwargs) -> dict:
15-
# Make a BS4 object
16-
soup = BeautifulSoup(page.text, features="html.parser")
17+
user_url = kwargs.get("url")
18+
19+
headers = {
20+
"Origin": "https://www.nelincs.gov.uk",
21+
"Referer": "https://www.nelincs.gov.uk",
22+
"User-Agent": "Mozilla/5.0",
23+
}
24+
25+
# Make the GET request
26+
response = requests.get(user_url, headers=headers)
27+
28+
# Parse the HTML
29+
soup = BeautifulSoup(response.content, "html.parser")
1730
soup.prettify()
1831

1932
data = {"bins": []}
2033

2134
# Get list items that can be seen on page
2235
for element in soup.find_all(
23-
"li", {"class": "list-group-item p-0 p-3 bin-collection-item"}
36+
"li", {"class": "border-0 list-group-item p-3 bg-light rounded p-2"}
2437
):
2538
element_text = element.text.strip().split("\n\n")
2639
element_text = [x.strip() for x in element_text]
@@ -35,9 +48,7 @@ def parse_data(self, page: str, **kwargs) -> dict:
3548
data["bins"].append(dict_data)
3649

3750
# Get hidden list items too
38-
for element in soup.find_all(
39-
"li", {"class": "list-group-item p-0 p-3 bin-collection-item d-none"}
40-
):
51+
for element in soup.find_all("li", {"class": "border-0 list-group-item p-3"}):
4152
element_text = element.text.strip().split("\n\n")
4253
element_text = [x.strip() for x in element_text]
4354

Lines changed: 67 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
import time
2-
31
import requests
42
from bs4 import BeautifulSoup
53

@@ -17,76 +15,79 @@ class CouncilClass(AbstractGetBinDataClass):
1715

1816
def parse_data(self, page: str, **kwargs) -> dict:
1917

20-
user_uprn = kwargs.get("uprn")
21-
check_uprn(user_uprn)
18+
user_postcode = kwargs.get("postcode")
19+
user_paon = kwargs.get("paon")
20+
check_postcode(user_postcode)
21+
check_paon(user_paon)
2222
bindata = {"bins": []}
2323

24-
API_URL = "https://maps.norwich.gov.uk/arcgis/rest/services/MyNorwich/PropertyDetails/FeatureServer/2/query"
25-
26-
params = {
27-
"f": "json",
28-
"where": f"UPRN='{user_uprn}' or UPRN='0{user_uprn}'",
29-
"returnGeometry": "true",
30-
"spatialRel": "esriSpatialRelIntersects",
31-
"geometryType": "esriGeometryPolygon",
32-
"inSR": "4326",
33-
"outFields": "*",
34-
"outSR": "4326",
35-
"resultRecordCount": "1000",
24+
URI = "https://bnr-wrp.whitespacews.com/"
25+
26+
session = requests.Session()
27+
28+
# get link from first page as has some kind of unique hash
29+
r = session.get(
30+
URI,
31+
)
32+
r.raise_for_status()
33+
soup = BeautifulSoup(r.text, features="html.parser")
34+
35+
alink = soup.find("a", text="View my collections")
36+
37+
if alink is None:
38+
raise Exception("Initial page did not load correctly")
39+
40+
# greplace 'seq' query string to skip next step
41+
nextpageurl = alink["href"].replace("seq=1", "seq=2")
42+
43+
data = {
44+
"address_name_number": user_paon,
45+
"address_postcode": user_postcode,
3646
}
3747

38-
r = requests.get(API_URL, params=params)
39-
40-
data = r.json()
41-
data = data["features"][0]["attributes"]["WasteCollectionHtml"]
42-
soup = BeautifulSoup(data, "html.parser")
43-
44-
alternateCheck = soup.find("p")
45-
if alternateCheck.text.__contains__("alternate"):
46-
alternateCheck = True
47-
else:
48-
alternateCheck = False
49-
50-
strong = soup.find_all("strong")
51-
collections = []
52-
53-
if alternateCheck:
54-
bin_types = strong[2].text.strip().replace(".", "").split(" and ")
55-
for bin in bin_types:
56-
collections.append(
57-
(
58-
bin.capitalize(),
59-
datetime.strptime(strong[1].text.strip(), date_format),
60-
)
61-
)
62-
63-
else:
64-
p_tag = soup.find_all("p")
65-
i = 1
66-
for p in p_tag:
67-
bin_types = (
68-
p.text.split("Your ")[1].split(" is collected")[0].split(" and ")
69-
)
70-
for bin in bin_types:
71-
collections.append(
72-
(
73-
bin.capitalize(),
74-
datetime.strptime(strong[1].text.strip(), date_format),
75-
)
76-
)
77-
i += 2
78-
79-
if len(strong) > 3:
80-
collections.append(
81-
("Garden", datetime.strptime(strong[4].text.strip(), date_format))
82-
)
83-
84-
ordered_data = sorted(collections, key=lambda x: x[1])
85-
for item in ordered_data:
48+
# get list of addresses
49+
r = session.post(nextpageurl, data)
50+
r.raise_for_status()
51+
52+
soup = BeautifulSoup(r.text, features="html.parser")
53+
54+
# get first address (if you don't enter enough argument values this won't find the right address)
55+
alink = soup.find("div", id="property_list").find("a")
56+
57+
if alink is None:
58+
raise Exception("Address not found")
59+
60+
nextpageurl = URI + alink["href"]
61+
62+
# get collection page
63+
r = session.get(
64+
nextpageurl,
65+
)
66+
r.raise_for_status()
67+
soup = BeautifulSoup(r.text, features="html.parser")
68+
69+
if soup.find("span", id="waste-hint"):
70+
raise Exception("No scheduled services at this address")
71+
72+
u1s = soup.find("section", id="scheduled-collections").find_all("u1")
73+
74+
for u1 in u1s:
75+
lis = u1.find_all("li", recursive=False)
76+
77+
date = lis[1].text.replace("\n", "")
78+
bin_type = lis[2].text.replace("\n", "")
79+
8680
dict_data = {
87-
"type": item[0] + " bin",
88-
"collectionDate": item[1].strftime(date_format),
81+
"type": bin_type,
82+
"collectionDate": datetime.strptime(
83+
date,
84+
"%d/%m/%Y",
85+
).strftime(date_format),
8986
}
9087
bindata["bins"].append(dict_data)
9188

89+
bindata["bins"].sort(
90+
key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
91+
)
92+
9293
return bindata

0 commit comments

Comments
 (0)