Skip to content

Commit f1ba5bc

Browse files
committed
Move sample data and add to __init__.py files
1 parent 9d361df commit f1ba5bc

File tree

8 files changed

+569
-122
lines changed

8 files changed

+569
-122
lines changed

src/bivapp/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
from .bivapp import (
2+
BivariatePlotRaw,
3+
BivariatePlotGrid,
4+
BivariatePlotRawGAM,
5+
)

src/bivapp/bivapp.py

Lines changed: 0 additions & 116 deletions
Original file line numberDiff line numberDiff line change
@@ -20,125 +20,9 @@ def to_array(self):
2020

2121
from pygam import GAM, te # pyGAM maintanance lapsed. Waiting for update.
2222

23-
parent_dir = os.path.dirname(os.getcwd())
2423
np.random.seed(117)
2524

2625

27-
def ImportOpenairDataExample():
28-
"""
29-
Example data from the R openair library.
30-
See https://www.rdocumentation.org/packages/openair/versions/2.18-2/topics/mydata.
31-
Gaseous pollutants are all in ppbv, except CO which is in ppmv.
32-
Particle-phase pollutants are in ug/m3.
33-
Wind speeds are in m/s.
34-
Wind directions are in degrees, with 0/360 being winds from the north.
35-
"""
36-
df = pd.read_csv(parent_dir + "/test_data/openair_test_data.csv")
37-
df = df.drop(columns=["Unnamed: 0"])
38-
df["date"] = pd.to_datetime(df["date"])
39-
df = df.sort_index()
40-
41-
# Add some jitter to wind directions
42-
df["wd"] = df["wd"] + np.random.uniform(-10, 10, len(df.index))
43-
df["wd"] %= 360
44-
45-
# Add some jitter to wind speeds
46-
df["ws"] = df["ws"] + np.random.uniform(-0.1, 0.1, len(df.index))
47-
df.loc[df["ws"] < 0, "ws"] = 0
48-
49-
return df
50-
51-
52-
def ImportTorontoNO2():
53-
"""Imports test pollution data. The data is from 125 Resources Rd., Toronto, 2023-01-01 to 2023-12-31."""
54-
# Import and rename columns
55-
pol_df = pd.read_csv(parent_dir + "/test_data/NO2_2023.csv", skiprows=7)
56-
pol_df.columns = [
57-
"pollutant",
58-
"naps_id",
59-
"city",
60-
"province",
61-
"lat",
62-
"lon",
63-
"date",
64-
] + [i for i in range(1, 25)]
65-
66-
# Choose only Toronto 125 Resources Rd. station
67-
pol_df = pol_df.loc[pol_df["naps_id"] == 60430, :]
68-
69-
# Wide to long
70-
pol_df = pd.melt(
71-
pol_df,
72-
id_vars=["date"],
73-
value_vars=[i for i in range(1, 25)],
74-
var_name="hour",
75-
value_name="no2_ppbv",
76-
)
77-
78-
# Form and set datetime index
79-
pol_df["datetime"] = pd.to_datetime(
80-
pol_df["date"] + " " + (pol_df["hour"] - 1).astype(str) + ":00:00"
81-
) + pd.Timedelta("1h")
82-
pol_df = pol_df.drop(columns=["date", "hour"])
83-
pol_df = pol_df.set_index("datetime")
84-
pol_df = pol_df.sort_index()
85-
86-
# Set missing values to np.nan
87-
pol_df["no2_ppbv"] = pol_df["no2_ppbv"].replace(-999, np.nan)
88-
89-
return pol_df
90-
91-
92-
def ImportTorontoMet():
93-
"""Imports test meteorology. The data is from Toronto's Pearson Airport, 2023-01-01 to 2023-12-31."""
94-
# Import and rename columns
95-
met_df = pd.read_csv(parent_dir + "/test_data/climate-hourly.csv")
96-
met_df = met_df[["UTC_DATE", "WIND_DIRECTION", "WIND_SPEED"]]
97-
met_df.columns = ["datetime", "wind_dir", "wind_speed"]
98-
99-
# Convert from UTC to EST (pol data is in "local standard time")
100-
met_df["datetime"] = pd.to_datetime(met_df["datetime"])
101-
met_df["datetime"] = met_df["datetime"].dt.tz_localize("UTC").dt.tz_convert("EST")
102-
met_df["datetime"] = met_df["datetime"].dt.tz_localize(None)
103-
met_df = met_df.set_index("datetime")
104-
met_df = met_df.sort_index()
105-
106-
# Drop the first row (last hour of prior year)
107-
met_df = met_df.iloc[1:, :]
108-
109-
# Convert wind direction from tens of degrees to degrees
110-
met_df["wind_dir_deg"] = met_df["wind_dir"] * 10
111-
met_df = met_df.drop(columns=["wind_dir"])
112-
met_df["wind_dir_deg"] = met_df["wind_dir_deg"].replace(
113-
0, np.nan
114-
) # zeros are calms
115-
116-
# Add some jitter to wind direction
117-
met_df["wind_dir_deg"] = met_df["wind_dir_deg"] + np.random.uniform(
118-
-10, 10, len(met_df.index)
119-
)
120-
met_df["wind_dir_deg"] %= 360
121-
122-
# Add some jitter to wind speed and convert from km/h to m/s
123-
met_df["wind_speed"] = met_df["wind_speed"] + np.random.uniform(
124-
-0.5, 0.5, len(met_df.index)
125-
)
126-
met_df.loc[met_df["wind_speed"] < 0, "wind_speed"] = (
127-
0 # the jittering could introduce negatives
128-
)
129-
met_df["wind_speed_m_s"] = met_df["wind_speed"] / 3.6
130-
met_df = met_df.drop(columns=["wind_speed"])
131-
132-
return met_df
133-
134-
135-
def LoadTorontoDataExample():
136-
pol_df = ImportTorontoNO2()
137-
met_df = ImportTorontoMet()
138-
df = pol_df.join(met_df)
139-
return df
140-
141-
14226
def _makeScatterPlot(xs, ys, zs, vmin, vmax, cmap, colourbar_label, scatter_kwds):
14327
# Create and return the plot
14428
fig, ax = plt.subplots(1, 1, figsize=(8, 8), layout="constrained")

src/sample_data/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
from ._base import (
2+
ImportOpenairDataExample,
3+
ImportTorontoNO2,
4+
ImportTorontoMet,
5+
LoadTorontoDataExample,
6+
)

src/sample_data/_base.py

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
import pandas as pd, numpy as np
2+
3+
def ImportOpenairDataExample():
4+
"""
5+
Example data from the R openair library.
6+
See https://www.rdocumentation.org/packages/openair/versions/2.18-2/topics/mydata.
7+
Gaseous pollutants are all in ppbv, except CO which is in ppmv.
8+
Particle-phase pollutants are in ug/m3.
9+
Wind speeds are in m/s.
10+
Wind directions are in degrees, with 0/360 being winds from the north.
11+
"""
12+
df = pd.read_csv("/data/openair_test_data.csv")
13+
df = df.drop(columns=["Unnamed: 0"])
14+
df["date"] = pd.to_datetime(df["date"])
15+
df = df.sort_index()
16+
17+
# Add some jitter to wind directions
18+
df["wd"] = df["wd"] + np.random.uniform(-10, 10, len(df.index))
19+
df["wd"] %= 360
20+
21+
# Add some jitter to wind speeds
22+
df["ws"] = df["ws"] + np.random.uniform(-0.1, 0.1, len(df.index))
23+
df.loc[df["ws"] < 0, "ws"] = 0
24+
25+
return df
26+
27+
28+
def ImportTorontoNO2():
29+
"""Imports test pollution data. The data is from 125 Resources Rd., Toronto, 2023-01-01 to 2023-12-31."""
30+
# Import and rename columns
31+
pol_df = pd.read_csv("/data/NO2_2023.csv", skiprows=7)
32+
pol_df.columns = [
33+
"pollutant",
34+
"naps_id",
35+
"city",
36+
"province",
37+
"lat",
38+
"lon",
39+
"date",
40+
] + [i for i in range(1, 25)]
41+
42+
# Choose only Toronto 125 Resources Rd. station
43+
pol_df = pol_df.loc[pol_df["naps_id"] == 60430, :]
44+
45+
# Wide to long
46+
pol_df = pd.melt(
47+
pol_df,
48+
id_vars=["date"],
49+
value_vars=[i for i in range(1, 25)],
50+
var_name="hour",
51+
value_name="no2_ppbv",
52+
)
53+
54+
# Form and set datetime index
55+
pol_df["datetime"] = pd.to_datetime(
56+
pol_df["date"] + " " + (pol_df["hour"] - 1).astype(str) + ":00:00"
57+
) + pd.Timedelta("1h")
58+
pol_df = pol_df.drop(columns=["date", "hour"])
59+
pol_df = pol_df.set_index("datetime")
60+
pol_df = pol_df.sort_index()
61+
62+
# Set missing values to np.nan
63+
pol_df["no2_ppbv"] = pol_df["no2_ppbv"].replace(-999, np.nan)
64+
65+
return pol_df
66+
67+
68+
def ImportTorontoMet():
69+
"""Imports test meteorology. The data is from Toronto's Pearson Airport, 2023-01-01 to 2023-12-31."""
70+
# Import and rename columns
71+
met_df = pd.read_csv("/data/climate-hourly.csv")
72+
met_df = met_df[["UTC_DATE", "WIND_DIRECTION", "WIND_SPEED"]]
73+
met_df.columns = ["datetime", "wind_dir", "wind_speed"]
74+
75+
# Convert from UTC to EST (pol data is in "local standard time")
76+
met_df["datetime"] = pd.to_datetime(met_df["datetime"])
77+
met_df["datetime"] = met_df["datetime"].dt.tz_localize("UTC").dt.tz_convert("EST")
78+
met_df["datetime"] = met_df["datetime"].dt.tz_localize(None)
79+
met_df = met_df.set_index("datetime")
80+
met_df = met_df.sort_index()
81+
82+
# Drop the first row (last hour of prior year)
83+
met_df = met_df.iloc[1:, :]
84+
85+
# Convert wind direction from tens of degrees to degrees
86+
met_df["wind_dir_deg"] = met_df["wind_dir"] * 10
87+
met_df = met_df.drop(columns=["wind_dir"])
88+
met_df["wind_dir_deg"] = met_df["wind_dir_deg"].replace(
89+
0, np.nan
90+
) # zeros are calms
91+
92+
# Add some jitter to wind direction
93+
met_df["wind_dir_deg"] = met_df["wind_dir_deg"] + np.random.uniform(
94+
-10, 10, len(met_df.index)
95+
)
96+
met_df["wind_dir_deg"] %= 360
97+
98+
# Add some jitter to wind speed and convert from km/h to m/s
99+
met_df["wind_speed"] = met_df["wind_speed"] + np.random.uniform(
100+
-0.5, 0.5, len(met_df.index)
101+
)
102+
met_df.loc[met_df["wind_speed"] < 0, "wind_speed"] = (
103+
0 # the jittering could introduce negatives
104+
)
105+
met_df["wind_speed_m_s"] = met_df["wind_speed"] / 3.6
106+
met_df = met_df.drop(columns=["wind_speed"])
107+
108+
return met_df
109+
110+
111+
def LoadTorontoDataExample():
112+
pol_df = ImportTorontoNO2()
113+
met_df = ImportTorontoMet()
114+
df = pol_df.join(met_df)
115+
return df
File renamed without changes.

0 commit comments

Comments
 (0)