Skip to content

Commit 02d6339

Browse files
committed
[2025-02-04 20:58] Partial fix to issues with the combined Dataset for the new CDS API output.
Signed-off-by: Raoul Linnenbank <[email protected]>
1 parent 8fc5e7d commit 02d6339

File tree

10 files changed

+113
-64
lines changed

10 files changed

+113
-64
lines changed
Lines changed: 32 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,37 @@
1-
name: GitHub Pages Deployment
1+
name: Deployment to GitHub Pages
22
on:
3-
push:
4-
branches:
5-
main*
3+
push:
4+
branches:
5+
- main*
66
permissions:
7-
id-token: write
8-
pages: write
7+
id-token: write
8+
pages: write
99
jobs:
10-
# ---------------------------------------------------------------- #
11-
# | Building and deployment of Sphinx build | #
12-
# ---------------------------------------------------------------- #
13-
docs:
10+
# Build the Sphinx documentation
11+
build:
1412
runs-on: ubuntu-latest
1513
steps:
16-
- uses: actions/checkout@v3
17-
- uses: actions/setup-python@v3
18-
- name: Install Dependencies
19-
run: |
20-
pip install sphinx piccolo-theme myst_parser
21-
- name: Sphinx Build
22-
run: |
23-
sphinx-build ./sphinx-docs/ _build
24-
- name: Upload GitHub Pages artifact
25-
uses: actions/upload-pages-artifact@v3
26-
with:
27-
path: _build
28-
- name: Push artifact to pages
29-
uses: actions/[email protected]
14+
- uses: actions/checkout@v3
15+
- uses: actions/setup-python@v3
16+
- name: Install Dependencies
17+
run: |
18+
pip install sphinx piccolo-theme myst_parser
19+
- name: Sphinx Build
20+
run: |
21+
sphinx-build ./sphinx-docs/ build_outputs_folder
22+
- name: Upload GitHub Pages artifact
23+
uses: actions/upload-pages-artifact@v3
24+
id: deployment
25+
with:
26+
path: build_outputs_folder/
27+
# Deploy the Sphinx documentation to GitHub Pages
28+
deploy:
29+
environment:
30+
name: github-pages
31+
url: ${{ steps.deployment.outputs.page_url }}
32+
runs-on: ubuntu-latest
33+
needs: build
34+
steps:
35+
- name: Deploy to GitHub Pages
36+
id: deployment
37+
uses: actions/deploy-pages@v4

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "weather_provider_api"
3-
version = "2.63.0"
3+
version = "2.66.0"
44
description = "Weather Provider Libraries and API"
55
authors = ["Verbindingsteam", "Raoul Linnenbank <[email protected]>"]
66
license = "MPL-2.0"

weather_provider_api/routers/weather/controller.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,10 @@ def get_weather(
4040
source_id: str,
4141
model_id: str,
4242
fetch_async: bool,
43-
coords: List[List[Tuple[float, float]]],
44-
begin: Optional[datetime.datetime] = None,
45-
end: Optional[datetime.datetime] = None,
46-
factors: List[str] = None,
43+
coords: list[list[tuple[float, float]]],
44+
begin: datetime.datetime | None = None,
45+
end: datetime.datetime | None = None,
46+
factors: list[str] | None = None,
4747
):
4848
"""Function to use the requested weather model from the requested source to get specific weather factors for a
4949
specific time and specific location(s)

weather_provider_api/routers/weather/repository/repository.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ def cleanup(self):
110110
self._delete_excess_files()
111111

112112
@abstractmethod
113-
def update(self):
113+
def update(self, test_mode: bool = False) -> RepositoryUpdateResult:
114114
raise NotImplementedError(NOT_IMPLEMENTED_ERROR)
115115

116116
def gather_period(self, begin: datetime, end: datetime, coordinates: List[GeoPosition]) -> xr.Dataset:

weather_provider_api/routers/weather/sources/cds/client/cds_api_tools.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,11 +80,11 @@ class CDSRequest(BaseModel):
8080
"21:00",
8181
"22:00",
8282
"23:00",
83-
]
83+
],
8484
)
8585
data_format: str = "netcdf"
8686
download_format: str = "zip"
87-
area: tuple[float, float, float, float] = Field((7.22, 50.75, 3.2, 53.7))
87+
area: tuple[float, float, float, float] = (53.7, 3.2, 50.75, 7.22)
8888

8989
@property
9090
def request_parameters(self) -> dict[str, str | list[str] | tuple[float]]:
@@ -100,6 +100,7 @@ def request_parameters(self) -> dict[str, str | list[str] | tuple[float]]:
100100
"area": self.area,
101101
"data_format": self.data_format,
102102
"download_format": self.download_format,
103+
"grid_resolution": (0.25, 0.25),
103104
}
104105

105106

weather_provider_api/routers/weather/sources/cds/client/era5_utils.py

Lines changed: 36 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ class Era5UpdateSettings(BaseModel):
4141
maximum_runtime_in_minutes: int = 2 * 60 # 2 hours
4242

4343

44-
def era5_repository_update(update_settings: Era5UpdateSettings) -> RepositoryUpdateResult:
44+
def era5_repository_update(update_settings: Era5UpdateSettings, test_mode: bool) -> RepositoryUpdateResult:
4545
"""A function to update a variant of ERA5 data into the repository."""
4646
starting_moment_of_update = datetime.now(UTC)
4747
cutoff_time = starting_moment_of_update + relativedelta(minutes=update_settings.maximum_runtime_in_minutes)
@@ -54,7 +54,7 @@ def era5_repository_update(update_settings: Era5UpdateSettings) -> RepositoryUpd
5454
logger.info(f" - Maximum runtime: {update_settings.maximum_runtime_in_minutes} minutes ({cutoff_time})")
5555

5656
try:
57-
_era5_update_month_by_month(update_settings, starting_moment_of_update, cutoff_time)
57+
_era5_update_month_by_month(update_settings, starting_moment_of_update, cutoff_time, test_mode)
5858
except Exception as e:
5959
logger.error(f"Failed to update ERA5 data. Reason: {e}")
6060
return RepositoryUpdateResult.failure
@@ -67,7 +67,7 @@ def era5_repository_update(update_settings: Era5UpdateSettings) -> RepositoryUpd
6767

6868

6969
def _era5_update_month_by_month(
70-
update_settings: Era5UpdateSettings, starting_moment_of_update: datetime, cutoff_time: datetime
70+
update_settings: Era5UpdateSettings, starting_moment_of_update: datetime, cutoff_time: datetime, test_mode: bool
7171
):
7272
"""A function to update a variant of ERA5 data into the repository."""
7373
amount_of_months_processed = amount_of_months_not_processable = 0
@@ -90,7 +90,7 @@ def _era5_update_month_by_month(
9090
logger.warning("Maximum runtime reached. Stopping update.")
9191
break
9292

93-
update_result = _era5_update_month(update_settings, update_month)
93+
update_result = _era5_update_month(update_settings, update_month, test_mode)
9494
if update_result == RepositoryUpdateResult.failure:
9595
amount_of_months_not_processable += 1
9696
amount_of_months_processed += 1
@@ -111,7 +111,9 @@ def _era5_update_month_by_month(
111111
logger.info(f"Average time per month: {average_time_per_month_in_minutes} minutes")
112112

113113

114-
def _era5_update_month(update_settings: Era5UpdateSettings, update_month: datetime) -> RepositoryUpdateResult:
114+
def _era5_update_month(
115+
update_settings: Era5UpdateSettings, update_month: datetime, test_mode: bool
116+
) -> RepositoryUpdateResult:
115117
"""A function to update a variant of ERA5 data into the repository."""
116118
logger.debug(f" > Processing month: {update_month.year}-{update_month.month}")
117119

@@ -123,6 +125,9 @@ def _era5_update_month(update_settings: Era5UpdateSettings, update_month: dateti
123125
logger.debug(f" > File {month_file} requires update.")
124126
month_file_name = month_file.with_suffix(Era5FileSuffixes.UNFORMATTED)
125127

128+
# Only the first day of each month in test mode, otherwise all days:
129+
day = [str(i) for i in list(range(1, 32))] if not test_mode else ["1"]
130+
126131
try:
127132
download_era5_data(
128133
update_settings.era5_dataset_to_update_from,
@@ -131,9 +136,8 @@ def _era5_update_month(update_settings: Era5UpdateSettings, update_month: dateti
131136
variables=update_settings.factors_to_process,
132137
year=[str(update_month.year)],
133138
month=[str(update_month.month)],
134-
day=[str(i) for i in list(range(1, 32))],
139+
day=day,
135140
time=[f"{hour:02d}:00" for hour in range(24)],
136-
area=(53.510403, 3.314971, 50.803721, 7.092053),
137141
),
138142
target_location=str(month_file_name),
139143
)
@@ -194,7 +198,6 @@ def _verify_first_day_available_for_era5(update_moment: datetime, update_setting
194198
month=[str(update_moment.month)],
195199
day=[str(update_moment.day)],
196200
time=[f"{hour:02d}:00" for hour in range(2)],
197-
area=(53.510403, 3.314971, 50.803721, 7.092053), # The Netherlands area
198201
),
199202
target_location=tempfile.NamedTemporaryFile().name,
200203
)
@@ -230,7 +233,6 @@ def _finalize_formatted_file(file_path: Path, current_moment: date, verification
230233
logger.error(f" > Failed to remove temporary file {file_path.with_suffix(file_suffix)}: {e}")
231234

232235
# Rename the file to its proper name:
233-
print("RENAMING FILE", current_moment, verification_date, permanent_month, incomplete_month)
234236
if current_moment == verification_date.replace(day=1):
235237
# Current month means an incomplete file
236238
file_path.with_suffix(Era5FileSuffixes.FORMATTED).rename(file_path.with_suffix(Era5FileSuffixes.INCOMPLETE))
@@ -343,15 +345,32 @@ def _recombine_multiple_files(unformatted_file: Path) -> None:
343345
with zipfile.ZipFile(unformatted_file, "r") as zip_ref:
344346
zip_ref.extractall(temp_dir)
345347

346-
# Load the data
347-
348-
data_stream_land_accum = xr.open_dataset(Path(temp_dir).joinpath("data_stream-oper_stepType-accum.nc"))
349-
data_stream_land_instant = xr.open_dataset(Path(temp_dir).joinpath("data_stream-oper_stepType-instant.nc"))
350-
data_stream_wave_instant = xr.open_dataset(Path(temp_dir).joinpath("data_stream-wave_stepType-instant.nc"))
348+
concatenated_dataset = xr.Dataset()
349+
files_to_load_in_order = [
350+
"data_stream-oper_stepType-instant",
351+
"data_stream-oper_stepType-accum",
352+
# "data_stream-wave_stepType-instant",
353+
]
354+
355+
# TODO: Load, convert to dataframe, merge, convert back to xarray
356+
concatenated_dataset = xr.Dataset()
357+
for filename in files_to_load_in_order:
358+
file_path = Path(temp_dir).joinpath(f"{filename}.nc")
359+
if not file_path.exists():
360+
logger.error(f" > Required file {filename}.nc does not exist. Aborting recombination.")
361+
raise FileNotFoundError(f" > Required file {filename}.nc does not exist. Aborting recombination.")
362+
363+
dataset = xr.open_dataset(file_path)
364+
365+
if not concatenated_dataset.data_vars:
366+
concatenated_dataset = dataset.copy(deep=True)
367+
else:
368+
concatenated_dataset = xr.merge(
369+
[concatenated_dataset, dataset], join="outer", compat="no_conflicts", combine_attrs="override"
370+
)
351371

352-
# Merge the data
353-
combined_data = xr.merge([data_stream_land_accum, data_stream_land_instant, data_stream_wave_instant])
354-
combined_data.to_netcdf(unformatted_file, format="NETCDF4", engine="netcdf4")
372+
concatenated_dataset.to_netcdf(unformatted_file, format="NETCDF4", engine="netcdf4")
373+
# raise ValueError("This is not working yet")
355374

356375

357376
def download_era5_data(

weather_provider_api/routers/weather/sources/cds/client/era5land_repository.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from pytz import UTC
88

99
from weather_provider_api.routers.weather.repository.repository import RepositoryUpdateResult, WeatherRepositoryBase
10+
from weather_provider_api.routers.weather.sources.cds.client.cds_api_tools import CDSDataSets
1011
from weather_provider_api.routers.weather.sources.cds.client.era5_utils import (
1112
Era5UpdateSettings,
1213
era5_repository_update,
@@ -52,7 +53,7 @@ def last_day_of_repo(self) -> datetime:
5253
last_day_of_repo = last_day_of_repo.replace(hour=0, minute=0, second=0, microsecond=0)
5354
return last_day_of_repo
5455

55-
def update(self) -> RepositoryUpdateResult:
56+
def update(self, test_mode: bool = False) -> RepositoryUpdateResult:
5657
"""The update implementation for the ERA5 Land repository.
5758
5859
This function handles all the required actions to update the repository completely, but taking into
@@ -69,14 +70,15 @@ def update(self) -> RepositoryUpdateResult:
6970
return era5_repository_update(
7071
Era5UpdateSettings(
7172
filename_prefix=self.file_prefix,
72-
era5_dataset_to_update_from="reanalysis-era5-land",
73+
era5_dataset_to_update_from=CDSDataSets.ERA5LAND,
7374
era5_product_type="reanalysis",
7475
factor_dictionary=era5land_factors,
7576
factors_to_process=[era5land_factors[x] for x in list(era5land_factors.keys())],
7677
maximum_runtime_in_minutes=self.runtime_limit,
7778
repository_time_range=(self.first_day_of_repo, self.last_day_of_repo),
7879
target_storage_location=self.repository_folder,
79-
)
80+
),
81+
test_mode=test_mode,
8082
)
8183

8284
def _delete_files_outside_of_scope(self):

weather_provider_api/routers/weather/sources/cds/client/era5sl_repository.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,14 +52,17 @@ def last_day_of_repo(self) -> datetime:
5252
last_day_of_repo = last_day_of_repo.replace(hour=0, minute=0, second=0, microsecond=0)
5353
return last_day_of_repo
5454

55-
def update(self) -> RepositoryUpdateResult:
55+
def update(self, test_mode: bool) -> RepositoryUpdateResult:
5656
"""The update implementation for the ERA5 Single Levels repository.
5757
5858
This function handles all the required actions to update the repository completely, but taking into
5959
account its set runtime_limit. If based on the time of completion of other downloaded files this session
6060
the next file wouldn't complete within the runtime_limit, the update process halts.
6161
(if no other downloads were made yet, a generous rough estimate is used).
6262
63+
Args:
64+
test_mode: A boolean indicating whether the update process should run in test mode.
65+
6366
Returns:
6467
A RepositoryUpdateResult value indicating a completion, time-out or failure of the update process
6568
"""
@@ -77,7 +80,8 @@ def update(self) -> RepositoryUpdateResult:
7780
maximum_runtime_in_minutes=self.runtime_limit,
7881
repository_time_range=(self.first_day_of_repo, self.last_day_of_repo),
7982
target_storage_location=self.repository_folder,
80-
)
83+
),
84+
test_mode=test_mode,
8185
)
8286

8387
def _delete_files_outside_of_scope(self):
@@ -130,8 +134,11 @@ def _get_file_list_for_period(self, start: datetime, end: datetime):
130134
file_month = int(file[len_filename_until_date + 5 : len_filename_until_date + 7])
131135
date_for_filename = datetime(year=file_year, month=file_month, day=15).astimezone(UTC)
132136

133-
134-
if start.replace(day=1) < date_for_filename < datetime(year=end.year, month=end.month, day=28).astimezone(UTC):
137+
if (
138+
start.replace(day=1)
139+
< date_for_filename
140+
< datetime(year=end.year, month=end.month, day=28).astimezone(UTC)
141+
):
135142
# If the file is within the requested period, save it to the list of filtered files
136143
list_of_filtered_files.append(file)
137144

weather_provider_api/routers/weather/utils/serializers.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ def file_or_text_response(
2626
model_id: str,
2727
request: Union[WeatherContentRequestQuery, WeatherContentRequestMultiLocationQuery],
2828
coords: List[Tuple[float, float]],
29-
):
29+
) -> tuple[ScientificJSONResponse | FileResponse, str | None]:
3030
if response_format == ResponseFormat.json:
3131
return json_response(unserialized_data, coords)
3232
elif response_format == ResponseFormat.json_dataset:
@@ -41,8 +41,8 @@ def file_response(
4141
source_id: str,
4242
model_id: str,
4343
request: WeatherContentRequestQuery,
44-
coords: List[Tuple[float, float]],
45-
):
44+
coords: list[tuple[float, float]],
45+
) -> tuple[FileResponse, str]:
4646
if response_format == ResponseFormat.netcdf4:
4747
file_path = to_netcdf4(unserialized_data)
4848
mime = "application/x-netcdf4"
@@ -64,7 +64,9 @@ def file_response(
6464

6565

6666
def generate_filename(source_id: str, model_id: str, request: WeatherContentRequestQuery, extension: str):
67-
file_name = f"weather_{source_id}_{model_id}_{request.begin}-{request.end}{extension}".replace(" ", "T")
67+
file_name = f"weather_{source_id}_{model_id}_{request.begin}-{request.end}{extension}".replace(" ", "T").replace(
68+
":", ""
69+
)
6870
return file_name
6971

7072

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,28 @@
11
#!/usr/bin/env python
2-
# -*- coding: utf-8 -*-
2+
33

44
# SPDX-FileCopyrightText: 2019-2022 Alliander N.V.
55
# SPDX-License-Identifier: MPL-2.0
6+
import sys
7+
8+
from loguru import logger
69

710
from weather_provider_api.routers.weather.sources.cds.client.era5sl_repository import (
811
ERA5SLRepository,
912
)
1013

1114

12-
def main():
15+
def main(args) -> None:
16+
"""Run the update of the ERA5SL repository."""
17+
test_mode = False
18+
19+
if len(args) == 2 and args[1] == "testmode":
20+
logger.warning("WARNING: Running in test mode")
21+
test_mode = True
22+
1323
era5sl_repo = ERA5SLRepository()
14-
era5sl_repo.update()
24+
era5sl_repo.update(test_mode)
1525

1626

1727
if __name__ == "__main__":
18-
main()
28+
main(sys.argv)

0 commit comments

Comments
 (0)