Skip to content

Commit 41f6f3d

Browse files
authored
Merge pull request #1556 from tsharp42/AddDarlingtonBoroughCouncil
2 parents 0edc373 + 960e2d6 commit 41f6f3d

File tree

2 files changed

+75
-0
lines changed

2 files changed

+75
-0
lines changed

uk_bin_collection/tests/input.json

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -641,6 +641,13 @@
641641
"wiki_note": "Pass the house number and postcode in their respective parameters. This parser requires a Selenium webdriver.",
642642
"LAD24CD": "E07000096"
643643
},
644+
"DarlingtonBoroughCouncil": {
645+
"uprn": "10003076924",
646+
"url": "https://www.darlington.gov.uk/bins-waste-and-recycling/collection-day-lookup/",
647+
"wiki_name": "Darlington Borough Council",
648+
"wiki_note": "Use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find your UPRN.",
649+
"LAD24CD": "E06000005"
650+
},
644651
"DartfordBoroughCouncil": {
645652
"uprn": "100060861698",
646653
"url": "https://www.dartford.gov.uk/waste-recycling/collection-day",
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
import re
2+
3+
from bs4 import BeautifulSoup
4+
5+
from uk_bin_collection.uk_bin_collection.common import *
6+
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
7+
8+
9+
# import the wonderful Beautiful Soup and the URL grabber
10+
class CouncilClass(AbstractGetBinDataClass):
11+
"""
12+
Concrete classes have to implement all abstract operations of the
13+
base class. They can also override some operations with a default
14+
implementation.
15+
"""
16+
17+
def parse_data(self, page: str, **kwargs) -> dict:
18+
19+
data = {"bins": []}
20+
21+
user_uprn = kwargs.get("uprn")
22+
check_uprn(user_uprn)
23+
24+
url = f"https://www.darlington.gov.uk/bins-waste-and-recycling/collection-day-lookup/?uprn={user_uprn}"
25+
26+
# Referrer: https://www.darlington.gov.uk/bins-waste-and-recycling/collection-day-lookup/
27+
# X-Requested-With: XMLHttpRequest
28+
headers = {
29+
"Accept": "*/*",
30+
"Accept-Encoding": "gzip, deflate, br, zstd",
31+
"Accept-Language": "en-GB,en;q=0.5",
32+
"Referer": "https://www.darlington.gov.uk/bins-waste-and-recycling/collection-day-lookup/",
33+
"Sec-Detch-Dest": "empty",
34+
"Sec-Fetch-Mode": "cors",
35+
"Sec-Fetch-Site": "same-origin",
36+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.6167.186 Safari/537.36",
37+
"X-Requested-With": "XMLHttpRequest",
38+
}
39+
40+
# Make a BS4 object
41+
page = requests.get(url, headers=headers)
42+
soup = BeautifulSoup(page.text, features="html.parser")
43+
soup.prettify()
44+
45+
# Loop over each date card
46+
card_blocks = soup.select("#detailsDisplay .refuse-results")
47+
48+
for card in card_blocks:
49+
bin_date_tag = card.select_one(".card-footer h3")
50+
if not bin_date_tag:
51+
continue
52+
53+
bin_type = card.select_one(".card-header h2").text.strip()
54+
bin_date = bin_date_tag.text.strip()
55+
56+
# Remove any extra text from the date "(Today)", "(Tomorrow)"
57+
cleaned_bin_date = re.sub(r"\s*\(.*?\)", "", bin_date).strip()
58+
59+
next_binfo = {
60+
"type": bin_type,
61+
"collectionDate": datetime.strptime(
62+
cleaned_bin_date, "%A %d %B %Y"
63+
).strftime(date_format),
64+
}
65+
66+
data["bins"].append(next_binfo)
67+
68+
return data

0 commit comments

Comments
 (0)