Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ authors:
given-names: "Radovan"
- family-names: "Darst"
given-names: "Richard"
- family-names: "Tian"
given-names: "Yu"
- family-names: "Juselius"
given-names: "Jonas"
- family-names: "Di Remigio Eikås"
Expand Down
79 changes: 57 additions & 22 deletions content/code/abstracting-plot.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,66 @@
import pandas as pd
from matplotlib import pyplot as plt
import matplotlib.pyplot as plt

plt.xlabel("measurements")
plt.ylabel("air temperature (deg C)")

def plot(column, label, location, color, compute_mean):
fig, ax = plt.subplots()

def plot_temperatures(temperatures):
plt.plot(temperatures, "r-")
plt.axhline(y=mean, color="b", linestyle="--")
plt.show()
plt.savefig(f"{num_measurements}.png")
plt.clf()
# time series
ax.plot(
data_month.index,
data_month[column],
label=label,
color=color,
)

if compute_mean:
values = data_month[column].values
mean_value = sum(values) / len(values)

for num_measurements in [25, 100, 500]:
# mean (as horizontal dashed line)
ax.axhline(
y=mean_value,
label=f"mean {label}: {mean_value:.1f}",
color=color,
linestyle="--",
)

# read data from file
data = pd.read_csv("temperatures.csv", nrows=num_measurements)
temperatures = data["Air temperature (degC)"]
ax.set_title(f"{label} at {location}")
ax.set_xlabel("date and time")
ax.set_ylabel(label)
ax.legend()
ax.grid(True)

# compute statistics
mean = sum(temperatures) / num_measurements
# format x-axis for better date display
fig.autofmt_xdate()

# plot results
# plt.plot(temperatures, 'r-')
# plt.axhline(y=mean, color='b', linestyle='--')
# plt.show()
# plt.savefig(f'{num_measurements}.png')
# plt.clf()
plot_temperatures(temperatures)
fig.savefig(f"{month}-{column}.png")


# read data
data = pd.read_csv("weather_data.csv")

# combine 'date' and 'time' into a single datetime column
data["datetime"] = pd.to_datetime(data["date"] + " " + data["time"])

# set datetime as index for convenience
data = data.set_index("datetime")


for month in ["2024-01", "2024-02", "2024-03"]:
data_month = data.loc[month]

plot(
"air_temperature_celsius",
"air temperature (C)",
"Helsinki airport",
"red",
compute_mean=True,
)
plot(
"precipitation_mm",
"precipitation (mm)",
"Helsinki airport",
"blue",
compute_mean=False,
)
78 changes: 63 additions & 15 deletions content/code/add-iteration.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,69 @@
import pandas as pd
from matplotlib import pyplot as plt
import matplotlib.pyplot as plt

plt.xlabel("measurements")
plt.ylabel("air temperature (deg C)")

for num_measurements in [25, 100, 500]:
# read data
data = pd.read_csv("weather_data.csv")

# read data from file
data = pd.read_csv("temperatures.csv", nrows=num_measurements)
temperatures = data["Air temperature (degC)"]
# combine 'date' and 'time' into a single datetime column
data["datetime"] = pd.to_datetime(data["date"] + " " + data["time"])

# compute statistics
mean = sum(temperatures) / num_measurements
# set datetime as index for convenience
data = data.set_index("datetime")

# plot results
plt.plot(temperatures, "r-")
plt.axhline(y=mean, color="b", linestyle="--")
plt.show()
plt.savefig(f"{num_measurements}.png")
plt.clf()

for month in ["2024-01", "2024-02", "2024-03"]:
data_month = data.loc[month]

fig, ax = plt.subplots()

# temperature time series
ax.plot(
data_month.index,
data_month["air_temperature_celsius"],
label="air temperature (C)",
color="red",
)

values = data_month["air_temperature_celsius"].values
mean_temp = sum(values) / len(values)

# mean temperature (as horizontal dashed line)
ax.axhline(
y=mean_temp,
label=f"mean air temperature (C): {mean_temp:.1f}",
color="red",
linestyle="--",
)

ax.set_title("air temperature (C) at Helsinki airport")
ax.set_xlabel("date and time")
ax.set_ylabel("air temperature (C)")
ax.legend()
ax.grid(True)

# format x-axis for better date display
fig.autofmt_xdate()

fig.savefig(f"{month}-temperature.png")

fig, ax = plt.subplots()

# precipitation time series
ax.plot(
data_month.index,
data_month["precipitation_mm"],
label="precipitation (mm)",
color="blue",
)

ax.set_title("precipitation (mm) at Helsinki airport")
ax.set_xlabel("date and time")
ax.set_ylabel("precipitation (mm)")
ax.legend()
ax.grid(True)

# format x-axis for better date display
fig.autofmt_xdate()

fig.savefig(f"{month}-precipitation.png")
120 changes: 87 additions & 33 deletions content/code/cli.py
Original file line number Diff line number Diff line change
@@ -1,55 +1,109 @@
from pathlib import Path


import pandas as pd
from matplotlib import pyplot as plt
import matplotlib.pyplot as plt
import pytest
import click


def plot_data(data, mean, xlabel, ylabel, file_name):
plt.plot(data, "r-")
plt.xlabel(xlabel)
plt.ylabel(ylabel)
plt.axhline(y=mean, color="b", linestyle="--")
plt.savefig(file_name)
plt.clf()
def read_data(file_name):
data = pd.read_csv(file_name)

# combine 'date' and 'time' into a single datetime column
data["datetime"] = pd.to_datetime(data["date"] + " " + data["time"])

# set datetime as index for convenience
data = data.set_index("datetime")

return data

def compute_mean(data):
mean = sum(data) / len(data)
return mean

def arithmetic_mean(values):
mean_value = sum(values) / len(values)
return mean_value

def test_compute_mean():
result = compute_mean([1.0, 2.0, 3.0, 4.0])

def test_arithmetic_mean():
result = arithmetic_mean([1.0, 2.0, 3.0, 4.0])
assert result == pytest.approx(2.5)


def read_data(file_name, nrows, column):
data = pd.read_csv(file_name, nrows=nrows)
return data[column]
def plot(date_range, values, label, location, color, compute_mean, file_name):
fig, ax = plt.subplots()

# time series
ax.plot(
date_range,
values,
label=label,
color=color,
)

if compute_mean:
mean_value = arithmetic_mean(values)

# mean (as horizontal dashed line)
ax.axhline(
y=mean_value,
label=f"mean {label}: {mean_value:.1f}",
color=color,
linestyle="--",
)

ax.set_title(f"{label} at {location}")
ax.set_xlabel("date and time")
ax.set_ylabel(label)
ax.legend()
ax.grid(True)

# format x-axis for better date display
fig.autofmt_xdate()

fig.savefig(file_name)


@click.command()
@click.option("--month", required=True, type=str, help="Which month (YYYY-MM)?")
@click.option(
"--num-measurements", required=True, type=int, help="Number of measurements."
"--data-file",
required=True,
type=click.Path(exists=True, path_type=Path),
help="Data is read from this file.",
)
@click.option("--in-file", required=True, help="File name where we read from.")
@click.option("--out-file", required=True, help="File name where we write to.")
def main(num_measurements, in_file, out_file):

temperatures = read_data(
file_name=in_file,
nrows=num_measurements,
column="Air temperature (degC)",
)
@click.option(
"--output-directory",
required=True,
type=click.Path(exists=True, path_type=Path),
help="Figures are written to this directory.",
)
def main(
month,
data_file,
output_directory,
):
data = read_data(data_file)

mean = compute_mean(temperatures)
data_month = data.loc[month]
date_range = data_month.index

plot_data(
data=temperatures,
mean=mean,
xlabel="measurements",
ylabel="air temperature (deg C)",
file_name=out_file,
plot(
date_range,
data_month["air_temperature_celsius"].values,
"air temperature (C)",
"Helsinki airport",
"red",
compute_mean=True,
file_name=output_directory / f"{month}-temperature.png",
)
plot(
date_range,
data_month["precipitation_mm"].values,
"precipitation (mm)",
"Helsinki airport",
"blue",
compute_mean=False,
file_name=output_directory / f"{month}-precipitation.png",
)


Expand Down
47 changes: 47 additions & 0 deletions content/code/initial-version-with-mean.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import pandas as pd
import matplotlib.pyplot as plt


# read data
data = pd.read_csv("weather_data.csv")

# combine 'date' and 'time' into a single datetime column
data["datetime"] = pd.to_datetime(data["date"] + " " + data["time"])

# set datetime as index for convenience
data = data.set_index("datetime")

# keep only january data
january = data.loc["2024-01"]

fig, ax = plt.subplots()

# temperature time series
ax.plot(
january.index,
january["air_temperature_celsius"],
label="air temperature (C)",
color="red",
)

values = january["air_temperature_celsius"].values
mean_temp = sum(values) / len(values)

# mean temperature (as horizontal dashed line)
ax.axhline(
y=mean_temp,
label=f"mean air temperature (C): {mean_temp:.1f}",
color="red",
linestyle="--",
)

ax.set_title("air temperature (C) at Helsinki airport")
ax.set_xlabel("date and time")
ax.set_ylabel("air temperature (C)")
ax.legend()
ax.grid(True)

# format x-axis for better date display
fig.autofmt_xdate()

fig.savefig("2024-01-temperature.png")
Loading
Loading