-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathget_rioolwater.py
More file actions
119 lines (85 loc) · 3.99 KB
/
get_rioolwater.py
File metadata and controls
119 lines (85 loc) · 3.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import json
import bs4
import requests
from datetime import datetime
import pandas as pd
import platform
import streamlit as st
def get_normal_date(unixdate):
""" Convert the unixdate in a normal readable date
https://stackoverflow.com/questions/3682748/converting-unix-timestamp-string-to-readable-date
Args:
unixdate (str): the unix date
Returns:
str : the readable date in yyyy-mm-dd
"""
ts = int(unixdate)
# if you encounter a "year is out of range" error the timestamp
# may be in milliseconds, try `ts /= 1000` in that case
return (datetime.utcfromtimestamp(ts).strftime('%Y-%m-%d'))
def load_data_from_csv():
if platform.processor() != "":
file = r"C:\Users\rcxsm\Documents\python_scripts\covid19_seir_models\COVIDcases\input\rioolwaardes_official_rivm.csv"
else:
file = r"https://raw.githubusercontent.com/rcsmit/COVIDcases/main/input/rioolwaardes_official_rivm.csv"
df_ = pd.read_csv(
file,
delimiter=",",
low_memory=False,
)
return df_
def save_df(df, name):
""" _ _ _ """
name_ = name + ".csv"
compression_opts = dict(method=None, archive_name=name_)
df.to_csv(name_, index=False, compression=compression_opts)
print("--- Saving " + name_ + " ---")
def scrape_data_from_site():
res = requests.get("https://coronadashboard.rijksoverheid.nl/landelijk/rioolwater") # your link here
soup = bs4.BeautifulSoup(res.content, features="lxml")
item=soup.select_one('script[id="__NEXT_DATA__"]').text
jsondata=json.loads(item)
#print and save the JSON in a nice way
output = (json.dumps(jsondata, indent=4))
with open('output_rioolwater.txt', 'w') as f:
f.write(output)
# TODO: see if using named tuples is better https://www.youtube.com/watch?v=BlVciXgsBYI
l=[]
columns = ["date_unix","value_rivm_official", "date_rivm"]
for i in jsondata["props"]["pageProps"]["selectedNlData"]["sewer"]["values"]:
date_unix = (i["date_unix"])
value_rivm_official = (i["average"])
date_rivm = get_normal_date(date_unix)
l.append([date_unix,value_rivm_official, date_rivm])
total_df = pd.DataFrame(l, columns=columns)
return total_df
def make_grouped_df(total_df):
total_df["date_rivm"] = pd.to_datetime(total_df["date_rivm"] , format="%Y-%m-%d")
total_df['year_number'] = total_df['date_rivm'].dt.isocalendar().year
total_df['week_number'] = total_df['date_rivm'].dt.isocalendar().week
total_df["weeknr"] = total_df["year_number"].astype(str) +"_" + total_df["week_number"].astype(str).str.zfill(2)
total_df["value_rivm_official_sma"] = total_df["value_rivm_official"].rolling(window = 5, center = False).mean().round(1)
# Group the dataframe by 'weeknr' and calculate the mean for numeric columns
df_grouped = total_df.groupby("weeknr", sort=True).mean(numeric_only=True).reset_index() # Use numeric_only in mean, not groupby
return df_grouped
def scrape_rioolwater():
"""Scrape rioolwaterdata van de RIVM site. Dit is verpakt in een stuk javascript met JSON
"""
# try:
# total_df = scrape_data_from_site()
# except:
total_df = load_data_from_csv()
df_grouped = make_grouped_df(total_df)
return total_df, df_grouped
def main():
total_df,df_grouped = scrape_rioolwater()
save_df(df_grouped,"rioolwater_per_week")
if __name__ == "__main__":
main()
""""Toen Delta kwam, zagen we zowel in het rioolwater als bij de positieve testen een duidelijke stijging.
Maar opvallend genoeg zagen we bij Omikron (BA.1) geen stijging in het rioolwater terwijl
het aantal positieve testen flink steeg". Het verband tussen het rioolwater en het aantal
positieve testen was bij BA.1 "helemaal weggevallen". "Met de opkomst van Omikron BA.2
was het verband tussen rioolwater en positieve testen weer terug."
https://www.corona-lokaal.nl/locatie/Nederland/waterzuivering/Nederland
"""