Merge pull request #147 from alliander-opensource/release-2.66

derkweijers · web-flow · commit 255df4169bf2 · 2025-02-06T13:30:42.000+01:00
Release 2.66
diff --git a/.github/workflows/github-pages-deployment.yml b/.github/workflows/github-pages-deployment.yml
@@ -1,29 +1,37 @@
-name: GitHub Pages Deployment
+name: Deployment to GitHub Pages
 on:
-  push:
-    branches:
-      main*
+    push:
+        branches:
+        - main*
 permissions:
-  id-token: write
-  pages: write
+    id-token: write
+    pages: write
 jobs:
-  # ---------------------------------------------------------------- #
-  # |  Building and deployment of Sphinx build                     | #
-  # ---------------------------------------------------------------- #
-  docs:
+  # Build the Sphinx documentation
+  build:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3
-      - uses: actions/setup-python@v3
-      - name: Install Dependencies
-        run: |
-          pip install sphinx piccolo-theme myst_parser
-      - name: Sphinx Build
-        run: |
-          sphinx-build ./sphinx-docs/ _build
-      - name: Upload GitHub Pages artifact
-        uses: actions/upload-pages-artifact@v3
-        with:
-          path: _build
-      - name: Push artifact to pages
-        uses: actions/deploy-pages@v3.0.1
+    - uses: actions/checkout@v3
+    - uses: actions/setup-python@v3
+    - name: Install Dependencies
+      run: |
+        pip install sphinx piccolo-theme myst_parser
+    - name: Sphinx Build
+      run: |
+        sphinx-build ./sphinx-docs/ build_outputs_folder
+    - name: Upload GitHub Pages artifact
+      uses: actions/upload-pages-artifact@v3
+      id: deployment
+      with:
+        path: build_outputs_folder/
+  # Deploy the Sphinx documentation to GitHub Pages
+  deploy:
+    environment:
+      name: github-pages
+      url: ${{ steps.deployment.outputs.page_url }}
+    runs-on: ubuntu-latest
+    needs: build
+    steps:
+      - name: Deploy to GitHub Pages
+        id: deployment
+        uses: actions/deploy-pages@v4
diff --git a/README.md b/README.md
@@ -10,6 +10,10 @@ SPDX-License-Identifier: MPL-2.0
 [![Bugs](https://sonarcloud.io/api/project_badges/measure?project=alliander-opensource_weather-provider-api&metric=bugs)](https://sonarcloud.io/dashboard?id=alliander-opensource_Weather-Provider-API)
 <!--[![Coverage](https://sonarcloud.io/api/project_badges/measure?project=alliander-opensource_weather-provider-api&metric=coverage)](https://sonarcloud.io/dashboard?id=alliander-opensource_Weather-Provider-API)-->
 
+> :warning: **Due to changes in the way that CDS delivers data, ERA5 data does not currently process sea level data 
+> for the time being. This will be fixed in a future release.**
+
+
 # Weather Provider Library and API
 
 This API is intended to help you fetch weather data from different data sources in an efficient and uniform way.
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "weather_provider_api"
-version = "2.63.0"
+version = "2.66.0"
 description = "Weather Provider Libraries and API"
 authors = ["Verbindingsteam", "Raoul Linnenbank <58594297+rflinnenbank@users.noreply.github.com>"]
 license = "MPL-2.0"
diff --git a/weather_provider_api/routers/weather/controller.py b/weather_provider_api/routers/weather/controller.py
@@ -40,10 +40,10 @@ def get_weather(
         source_id: str,
         model_id: str,
         fetch_async: bool,
-        coords: List[List[Tuple[float, float]]],
-        begin: Optional[datetime.datetime] = None,
-        end: Optional[datetime.datetime] = None,
-        factors: List[str] = None,
+        coords: list[list[tuple[float, float]]],
+        begin: datetime.datetime | None = None,
+        end: datetime.datetime | None = None,
+        factors: list[str] | None = None,
     ):
         """Function to use the requested weather model from the requested source to get specific weather factors for a
             specific time and specific location(s)
diff --git a/weather_provider_api/routers/weather/repository/repository.py b/weather_provider_api/routers/weather/repository/repository.py
@@ -110,7 +110,7 @@ def cleanup(self):
         self._delete_excess_files()
 
     @abstractmethod
-    def update(self):
+    def update(self, test_mode: bool = False) -> RepositoryUpdateResult:
         raise NotImplementedError(NOT_IMPLEMENTED_ERROR)
 
     def gather_period(self, begin: datetime, end: datetime, coordinates: List[GeoPosition]) -> xr.Dataset:
diff --git a/weather_provider_api/routers/weather/sources/cds/client/cds_api_tools.py b/weather_provider_api/routers/weather/sources/cds/client/cds_api_tools.py
@@ -80,11 +80,11 @@ class CDSRequest(BaseModel):
             "21:00",
             "22:00",
             "23:00",
-        ]
+        ],
     )
     data_format: str = "netcdf"
     download_format: str = "zip"
-    area: tuple[float, float, float, float] = Field((7.22, 50.75, 3.2, 53.7))
+    area: tuple[float, float, float, float] = (53.7, 3.2, 50.75, 7.22)
 
     @property
     def request_parameters(self) -> dict[str, str | list[str] | tuple[float]]:
@@ -100,6 +100,7 @@ def request_parameters(self) -> dict[str, str | list[str] | tuple[float]]:
             "area": self.area,
             "data_format": self.data_format,
             "download_format": self.download_format,
+            "grid_resolution": (0.25, 0.25),
         }
 
 
diff --git a/weather_provider_api/routers/weather/sources/cds/client/era5_utils.py b/weather_provider_api/routers/weather/sources/cds/client/era5_utils.py
@@ -41,7 +41,7 @@ class Era5UpdateSettings(BaseModel):
     maximum_runtime_in_minutes: int = 2 * 60  # 2 hours
 
 
-def era5_repository_update(update_settings: Era5UpdateSettings) -> RepositoryUpdateResult:
+def era5_repository_update(update_settings: Era5UpdateSettings, test_mode: bool) -> RepositoryUpdateResult:
     """A function to update a variant of ERA5 data into the repository."""
     starting_moment_of_update = datetime.now(UTC)
     cutoff_time = starting_moment_of_update + relativedelta(minutes=update_settings.maximum_runtime_in_minutes)
@@ -54,7 +54,7 @@ def era5_repository_update(update_settings: Era5UpdateSettings) -> RepositoryUpd
     logger.info(f" - Maximum runtime: {update_settings.maximum_runtime_in_minutes} minutes ({cutoff_time})")
 
     try:
-        _era5_update_month_by_month(update_settings, starting_moment_of_update, cutoff_time)
+        _era5_update_month_by_month(update_settings, starting_moment_of_update, cutoff_time, test_mode)
     except Exception as e:
         logger.error(f"Failed to update ERA5 data. Reason: {e}")
         return RepositoryUpdateResult.failure
@@ -67,7 +67,7 @@ def era5_repository_update(update_settings: Era5UpdateSettings) -> RepositoryUpd
 
 
 def _era5_update_month_by_month(
-    update_settings: Era5UpdateSettings, starting_moment_of_update: datetime, cutoff_time: datetime
+    update_settings: Era5UpdateSettings, starting_moment_of_update: datetime, cutoff_time: datetime, test_mode: bool
 ):
     """A function to update a variant of ERA5 data into the repository."""
     amount_of_months_processed = amount_of_months_not_processable = 0
@@ -90,7 +90,7 @@ def _era5_update_month_by_month(
             logger.warning("Maximum runtime reached. Stopping update.")
             break
 
-        update_result = _era5_update_month(update_settings, update_month)
+        update_result = _era5_update_month(update_settings, update_month, test_mode)
         if update_result == RepositoryUpdateResult.failure:
             amount_of_months_not_processable += 1
         amount_of_months_processed += 1
@@ -111,7 +111,9 @@ def _era5_update_month_by_month(
     logger.info(f"Average time per month: {average_time_per_month_in_minutes} minutes")
 
 
-def _era5_update_month(update_settings: Era5UpdateSettings, update_month: datetime) -> RepositoryUpdateResult:
+def _era5_update_month(
+    update_settings: Era5UpdateSettings, update_month: datetime, test_mode: bool
+) -> RepositoryUpdateResult:
     """A function to update a variant of ERA5 data into the repository."""
     logger.debug(f" > Processing month: {update_month.year}-{update_month.month}")
 
@@ -123,6 +125,9 @@ def _era5_update_month(update_settings: Era5UpdateSettings, update_month: dateti
         logger.debug(f" > File {month_file} requires update.")
         month_file_name = month_file.with_suffix(Era5FileSuffixes.UNFORMATTED)
 
+        # Only the first day of each month in test mode, otherwise all days:
+        day = [str(i) for i in list(range(1, 32))] if not test_mode else ["1"]
+
         try:
             download_era5_data(
                 update_settings.era5_dataset_to_update_from,
@@ -131,9 +136,8 @@ def _era5_update_month(update_settings: Era5UpdateSettings, update_month: dateti
                     variables=update_settings.factors_to_process,
                     year=[str(update_month.year)],
                     month=[str(update_month.month)],
-                    day=[str(i) for i in list(range(1, 32))],
+                    day=day,
                     time=[f"{hour:02d}:00" for hour in range(24)],
-                    area=(53.510403, 3.314971, 50.803721, 7.092053),
                 ),
                 target_location=str(month_file_name),
             )
@@ -194,7 +198,6 @@ def _verify_first_day_available_for_era5(update_moment: datetime, update_setting
                     month=[str(update_moment.month)],
                     day=[str(update_moment.day)],
                     time=[f"{hour:02d}:00" for hour in range(2)],
-                    area=(53.510403, 3.314971, 50.803721, 7.092053),  # The Netherlands area
                 ),
                 target_location=tempfile.NamedTemporaryFile().name,
             )
@@ -230,7 +233,6 @@ def _finalize_formatted_file(file_path: Path, current_moment: date, verification
                 logger.error(f" > Failed to remove temporary file {file_path.with_suffix(file_suffix)}: {e}")
 
     # Rename the file to its proper name:
-    print("RENAMING FILE", current_moment, verification_date, permanent_month, incomplete_month)
     if current_moment == verification_date.replace(day=1):
         # Current month means an incomplete file
         file_path.with_suffix(Era5FileSuffixes.FORMATTED).rename(file_path.with_suffix(Era5FileSuffixes.INCOMPLETE))
@@ -271,7 +273,8 @@ def file_requires_update(file_path: Path, current_month: date, verification_date
         return True  # An update should both clean the UNFORMATTED file and generate a proper one
 
     if not file_path.with_suffix(".nc").exists() or file_path.with_suffix(Era5FileSuffixes.INCOMPLETE).exists():
-        logger.debug(" > No file exists, or it is still incomplete: UPDATE REQUIRED ")
+        logger.debug(" > No file exists, or it is still incomplete: UPDATE REQUIRED")
+        print("File path: ", file_path)
         return True  # No file matching the mask or incomplete files always mean the update is required!
 
     files_in_folder = glob.glob(f"{file_path}*.nc")
@@ -343,15 +346,34 @@ def _recombine_multiple_files(unformatted_file: Path) -> None:
     with zipfile.ZipFile(unformatted_file, "r") as zip_ref:
         zip_ref.extractall(temp_dir)
 
-    # Load the data
-
-    data_stream_land_accum = xr.open_dataset(Path(temp_dir).joinpath("data_stream-oper_stepType-accum.nc"))
-    data_stream_land_instant = xr.open_dataset(Path(temp_dir).joinpath("data_stream-oper_stepType-instant.nc"))
-    data_stream_wave_instant = xr.open_dataset(Path(temp_dir).joinpath("data_stream-wave_stepType-instant.nc"))
+    concatenated_dataset = xr.Dataset()
+    files_to_load_in_order = [
+        "data_stream-oper_stepType-instant",
+        "data_stream-oper_stepType-accum",
+        # TODO: Add the following file back in when we can properly handle it
+        # "data_stream-wave_stepType-instant",  # Something about this data doesn't mesh well anymore with the rest...
+    ]
+
+    # TODO: Load, convert to dataframe, merge, convert back to xarray
+    concatenated_dataset = xr.Dataset()
+    for filename in files_to_load_in_order:
+        file_path = Path(temp_dir).joinpath(f"{filename}.nc")
+        if not file_path.exists():
+            logger.error(f" > Required file {filename}.nc does not exist. Aborting recombination.")
+            raise FileNotFoundError(f" > Required file {filename}.nc does not exist. Aborting recombination.")
+
+        dataset = xr.open_dataset(file_path)
+        dataset = dataset.drop("expver", errors="raise")
+
+        if not concatenated_dataset.data_vars:
+            concatenated_dataset = dataset.copy(deep=True)
+        else:
+            concatenated_dataset = xr.merge(
+                [concatenated_dataset, dataset], join="outer", compat="no_conflicts", combine_attrs="override"
+            )
 
-    # Merge the data
-    combined_data = xr.merge([data_stream_land_accum, data_stream_land_instant, data_stream_wave_instant])
-    combined_data.to_netcdf(unformatted_file, format="NETCDF4", engine="netcdf4")
+    concatenated_dataset.to_netcdf(unformatted_file, format="NETCDF4", engine="netcdf4")
+    # raise ValueError("This is not working yet")
 
 
 def download_era5_data(
diff --git a/weather_provider_api/routers/weather/sources/cds/client/era5land_repository.py b/weather_provider_api/routers/weather/sources/cds/client/era5land_repository.py
@@ -7,6 +7,7 @@
 from pytz import UTC
 
 from weather_provider_api.routers.weather.repository.repository import RepositoryUpdateResult, WeatherRepositoryBase
+from weather_provider_api.routers.weather.sources.cds.client.cds_api_tools import CDSDataSets
 from weather_provider_api.routers.weather.sources.cds.client.era5_utils import (
     Era5UpdateSettings,
     era5_repository_update,
@@ -52,7 +53,7 @@ def last_day_of_repo(self) -> datetime:
         last_day_of_repo = last_day_of_repo.replace(hour=0, minute=0, second=0, microsecond=0)
         return last_day_of_repo
 
-    def update(self) -> RepositoryUpdateResult:
+    def update(self, test_mode: bool = False) -> RepositoryUpdateResult:
         """The update implementation for the ERA5 Land repository.
 
         This function handles all the required actions to update the repository completely, but taking into
@@ -69,14 +70,15 @@ def update(self) -> RepositoryUpdateResult:
         return era5_repository_update(
             Era5UpdateSettings(
                 filename_prefix=self.file_prefix,
-                era5_dataset_to_update_from="reanalysis-era5-land",
+                era5_dataset_to_update_from=CDSDataSets.ERA5LAND,
                 era5_product_type="reanalysis",
                 factor_dictionary=era5land_factors,
                 factors_to_process=[era5land_factors[x] for x in list(era5land_factors.keys())],
                 maximum_runtime_in_minutes=self.runtime_limit,
                 repository_time_range=(self.first_day_of_repo, self.last_day_of_repo),
                 target_storage_location=self.repository_folder,
-            )
+            ),
+            test_mode=test_mode,
         )
 
     def _delete_files_outside_of_scope(self):
diff --git a/weather_provider_api/routers/weather/sources/cds/client/era5sl_repository.py b/weather_provider_api/routers/weather/sources/cds/client/era5sl_repository.py
@@ -52,14 +52,17 @@ def last_day_of_repo(self) -> datetime:
         last_day_of_repo = last_day_of_repo.replace(hour=0, minute=0, second=0, microsecond=0)
         return last_day_of_repo
 
-    def update(self) -> RepositoryUpdateResult:
+    def update(self, test_mode: bool) -> RepositoryUpdateResult:
         """The update implementation for the ERA5 Single Levels repository.
 
         This function handles all the required actions to update the repository completely, but taking into
         account its set runtime_limit. If based on the time of completion of other downloaded files this session
         the next file wouldn't complete within the runtime_limit, the update process halts.
         (if no other downloads were made yet, a generous rough estimate is used).
 
+        Args:
+            test_mode: A boolean indicating whether the update process should run in test mode.
+
         Returns:
             A RepositoryUpdateResult value indicating a completion, time-out or failure of the update process
         """
@@ -77,7 +80,8 @@ def update(self) -> RepositoryUpdateResult:
                 maximum_runtime_in_minutes=self.runtime_limit,
                 repository_time_range=(self.first_day_of_repo, self.last_day_of_repo),
                 target_storage_location=self.repository_folder,
-            )
+            ),
+            test_mode=test_mode,
         )
 
     def _delete_files_outside_of_scope(self):
@@ -130,8 +134,11 @@ def _get_file_list_for_period(self, start: datetime, end: datetime):
             file_month = int(file[len_filename_until_date + 5 : len_filename_until_date + 7])
             date_for_filename = datetime(year=file_year, month=file_month, day=15).astimezone(UTC)
 
-
-            if start.replace(day=1) < date_for_filename < datetime(year=end.year, month=end.month, day=28).astimezone(UTC):
+            if (
+                start.replace(day=1)
+                < date_for_filename
+                < datetime(year=end.year, month=end.month, day=28).astimezone(UTC)
+            ):
                 # If the file is within the requested period, save it to the list of filtered files
                 list_of_filtered_files.append(file)
 
diff --git a/weather_provider_api/routers/weather/utils/serializers.py b/weather_provider_api/routers/weather/utils/serializers.py
@@ -26,7 +26,7 @@ def file_or_text_response(
     model_id: str,
     request: Union[WeatherContentRequestQuery, WeatherContentRequestMultiLocationQuery],
     coords: List[Tuple[float, float]],
-):
+) -> tuple[ScientificJSONResponse | FileResponse, str | None]:
     if response_format == ResponseFormat.json:
         return json_response(unserialized_data, coords)
     elif response_format == ResponseFormat.json_dataset:
@@ -41,8 +41,8 @@ def file_response(
     source_id: str,
     model_id: str,
     request: WeatherContentRequestQuery,
-    coords: List[Tuple[float, float]],
-):
+    coords: list[tuple[float, float]],
+) -> tuple[FileResponse, str]:
     if response_format == ResponseFormat.netcdf4:
         file_path = to_netcdf4(unserialized_data)
         mime = "application/x-netcdf4"
@@ -64,7 +64,9 @@ def file_response(
 
 
 def generate_filename(source_id: str, model_id: str, request: WeatherContentRequestQuery, extension: str):
-    file_name = f"weather_{source_id}_{model_id}_{request.begin}-{request.end}{extension}".replace(" ", "T")
+    file_name = f"weather_{source_id}_{model_id}_{request.begin}-{request.end}{extension}".replace(" ", "T").replace(
+        ":", ""
+    )
     return file_name
 
 
diff --git a/weather_provider_api/scripts/update_era5sl_repository.py b/weather_provider_api/scripts/update_era5sl_repository.py