1
1
import pandas as pd
2
+ import requests
2
3
from bs4 import BeautifulSoup
4
+
3
5
from uk_bin_collection .uk_bin_collection .common import date_format
4
6
from uk_bin_collection .uk_bin_collection .get_bin_data import AbstractGetBinDataClass
5
7
@@ -12,15 +14,26 @@ class CouncilClass(AbstractGetBinDataClass):
12
14
"""
13
15
14
16
def parse_data (self , page : str , ** kwargs ) -> dict :
15
- # Make a BS4 object
16
- soup = BeautifulSoup (page .text , features = "html.parser" )
17
+ user_url = kwargs .get ("url" )
18
+
19
+ headers = {
20
+ "Origin" : "https://www.nelincs.gov.uk" ,
21
+ "Referer" : "https://www.nelincs.gov.uk" ,
22
+ "User-Agent" : "Mozilla/5.0" ,
23
+ }
24
+
25
+ # Make the GET request
26
+ response = requests .get (user_url , headers = headers )
27
+
28
+ # Parse the HTML
29
+ soup = BeautifulSoup (response .content , "html.parser" )
17
30
soup .prettify ()
18
31
19
32
data = {"bins" : []}
20
33
21
34
# Get list items that can be seen on page
22
35
for element in soup .find_all (
23
- "li" , {"class" : "list-group-item p-0 p-3 bin-collection-item " }
36
+ "li" , {"class" : "border-0 list-group-item p-3 bg-light rounded p-2 " }
24
37
):
25
38
element_text = element .text .strip ().split ("\n \n " )
26
39
element_text = [x .strip () for x in element_text ]
@@ -35,9 +48,7 @@ def parse_data(self, page: str, **kwargs) -> dict:
35
48
data ["bins" ].append (dict_data )
36
49
37
50
# Get hidden list items too
38
- for element in soup .find_all (
39
- "li" , {"class" : "list-group-item p-0 p-3 bin-collection-item d-none" }
40
- ):
51
+ for element in soup .find_all ("li" , {"class" : "border-0 list-group-item p-3" }):
41
52
element_text = element .text .strip ().split ("\n \n " )
42
53
element_text = [x .strip () for x in element_text ]
43
54
0 commit comments