pySTEPS
diff --git a/‎pysteps/blending/utils.py‎
Lines changed: 93 additions & 183 deletions b/‎pysteps/blending/utils.py‎
Lines changed: 93 additions & 183 deletions
diff --git a/‎pysteps/downscaling/rainfarm.py‎
Lines changed: 6 additions & 1 deletion b/‎pysteps/downscaling/rainfarm.py‎
Lines changed: 6 additions & 1 deletion
@@ -19,10 +19,12 @@
 """
 
 import datetime
+from typing import Any
 import warnings
 from pathlib import Path
 
 import numpy as np
+import xarray as xr
 
 from pysteps.cascade import get_method as cascade_get_method
 from pysteps.cascade.bandpass_filters import filter_gaussian
@@ -241,12 +243,7 @@ def blend_optical_flows(flows, weights):
 
 
 def decompose_NWP(
-    R_NWP,
-    NWP_model,
-    analysis_time,
-    timestep,
-    valid_times,
-    output_path,
+    precip_nwp_dataset: xr.Dataset,
     num_cascade_levels=8,
     num_workers=1,
     decomp_method="fft",
@@ -255,7 +252,7 @@ def decompose_NWP(
     normalize=True,
     compute_stats=True,
     compact_output=True,
-):
+) -> xr.Dataset:
     """Decomposes the NWP forecast data into cascades and saves it in
     a netCDF file
 
@@ -269,11 +266,6 @@ def decompose_NWP(
     analysis_time: numpy.datetime64
       The analysis time of the NWP forecast. The analysis time is assumed to be a
       numpy.datetime64 type as imported by the pysteps importer
-    timestep: int
-      Timestep in minutes between subsequent NWP forecast fields
-    valid_times: array_like
-      Array containing the valid times of the NWP forecast fields. The times are
-      assumed to be numpy.datetime64 types as imported by the pysteps importer.
     output_path: str
       The location where to save the file with the NWP cascade. Defaults to the
       path_workdir specified in the rcparams file.
@@ -315,62 +307,41 @@ def decompose_NWP(
 
     Returns
     -------
-    None
+    xarray.Dataset
+      The same dataset as was passed in but with the precip data replaced
+      with decomposed precip data and means and stds added
     """
 
-    if not NETCDF4_IMPORTED:
-        raise MissingOptionalDependency(
-            "netCDF4 package is required to save the decomposed NWP data, "
-            "but it is not installed"
-        )
-
-    # Make a NetCDF file
-    output_date = f"{analysis_time.astype('datetime64[us]').astype(datetime.datetime):%Y%m%d%H%M%S}"
-    outfn = Path(output_path) / f"cascade_{NWP_model}_{output_date}.nc"
-    ncf = netCDF4.Dataset(outfn, "w", format="NETCDF4")
-
-    # Express times relative to the zero time
-    zero_time = np.datetime64("1970-01-01T00:00:00", "ns")
-    valid_times = np.array(valid_times) - zero_time
-    analysis_time = analysis_time - zero_time
-
-    # Set attributes of decomposition method
-    ncf.domain = domain
-    ncf.normalized = int(normalize)
-    ncf.compact_output = int(compact_output)
-    ncf.analysis_time = int(analysis_time)
-    ncf.timestep = int(timestep)
-
-    # Create dimensions
-    ncf.createDimension("time", R_NWP.shape[0])
-    ncf.createDimension("cascade_levels", num_cascade_levels)
-    ncf.createDimension("x", R_NWP.shape[2])
-    ncf.createDimension("y", R_NWP.shape[1])
-
-    # Create variables (decomposed cascade, means and standard deviations)
-    R_d = ncf.createVariable(
-        "pr_decomposed",
-        np.float32,
-        ("time", "cascade_levels", "y", "x"),
-        zlib=True,
-        complevel=4,
-    )
-    means = ncf.createVariable("means", np.float64, ("time", "cascade_levels"))
-    stds = ncf.createVariable("stds", np.float64, ("time", "cascade_levels"))
-    v_times = ncf.createVariable("valid_times", np.float64, ("time",))
-    v_times.units = "nanoseconds since 1970-01-01 00:00:00"
-
-    # The valid times are saved as an array of floats, because netCDF files can't handle datetime types
-    v_times[:] = np.array([np.float64(valid_times[i]) for i in range(len(valid_times))])
-
+    nwp_precip_var = precip_nwp_dataset.attrs["precip_var"]
+    precip_nwp = precip_nwp_dataset[nwp_precip_var].values
     # Decompose the NWP data
-    filter_g = filter_gaussian(R_NWP.shape[1:], num_cascade_levels)
-    fft = utils_get_method(fft_method, shape=R_NWP.shape[1:], n_threads=num_workers)
+    filter_g = filter_gaussian(precip_nwp.shape[1:], num_cascade_levels)
+    fft = utils_get_method(
+        fft_method, shape=precip_nwp.shape[1:], n_threads=num_workers
+    )
     decomp_method, _ = cascade_get_method(decomp_method)
 
-    for i in range(R_NWP.shape[0]):
-        R_ = decomp_method(
-            field=R_NWP[i, :, :],
+    pr_decomposed = np.zeros(
+        (
+            precip_nwp.shape[0],
+            num_cascade_levels,
+            precip_nwp.shape[1],
+            precip_nwp.shape[2],
+        ),
+        dtype=np.float32,
+    )
+    means = np.zeros(
+        (precip_nwp.shape[0], num_cascade_levels),
+        dtype=np.float64,
+    )
+    stds = np.zeros(
+        (precip_nwp.shape[0], num_cascade_levels),
+        dtype=np.float64,
+    )
+
+    for i in range(precip_nwp.shape[0]):
+        decomposed_precip_nwp = decomp_method(
+            field=precip_nwp[i, :, :],
             bp_filter=filter_g,
             fft_method=fft,
             input_domain=domain,
@@ -380,157 +351,96 @@ def decompose_NWP(
             compact_output=compact_output,
         )
 
-        # Save data to netCDF file
-        # print(R_["cascade_levels"])
-        R_d[i, :, :, :] = R_["cascade_levels"]
-        means[i, :] = R_["means"]
-        stds[i, :] = R_["stds"]
+        pr_decomposed[i, :, :, :] = decomposed_precip_nwp["cascade_levels"]
+        means[i, :] = decomposed_precip_nwp["means"]
+        stds[i, :] = decomposed_precip_nwp["stds"]
 
-    # Close the file
-    ncf.close()
-
-
-def compute_store_nwp_motion(
-    precip_nwp,
-    oflow_method,
-    analysis_time,
-    nwp_model,
-    output_path,
+    precip_nwp_dataset = precip_nwp_dataset.assign_coords(
+        cascade_level=(
+            "cascade_level",
+            np.arange(num_cascade_levels),
+            {"long_name": "cascade level", "units": ""},
+        )
+    )
+    precip_nwp_dataset = precip_nwp_dataset.drop_vars(nwp_precip_var)
+    precip_nwp_dataset[nwp_precip_var] = (
+        ["time", "cascade_level", "y", "x"],
+        pr_decomposed,
+    )
+    precip_nwp_dataset["means"] = (["time", "cascade_level"], means)
+    precip_nwp_dataset["stds"] = (["time", "cascade_level"], stds)
+    return precip_nwp_dataset
+
+
+def preprocess_and_store_nwp_data(
+    precip_nwp_dataset: xr.Dataset,
+    oflow_method: str,
+    nwp_model: str,
+    output_path: str | None,
+    decompose_nwp: bool,
+    decompose_kwargs: dict[str, Any] = {},
 ):
     """Computes, per forecast lead time, the velocity field of an NWP model field.
 
     Parameters
     ----------
-    precip_nwp: array-like
-      Array of dimension (n_timesteps, x, y) containing the precipitation forecast
+    precip_nwp_dataset: xarray.Dataset
+      xarray Dataset containing the precipitation forecast
       from some NWP model.
     oflow_method: {'constant', 'darts', 'lucaskanade', 'proesmans', 'vet'}, optional
       An optical flow method from pysteps.motion.get_method.
-    analysis_time: numpy.datetime64
-      The analysis time of the NWP forecast. The analysis time is assumed to be a
-      numpy.datetime64 type as imported by the pysteps importer.
     nwp_model: str
       The name of the NWP model.
     output_path: str, optional
-      The location where to save the file with the NWP velocity fields. Defaults
+      The location where to save the netcdf file with the NWP velocity fields. Defaults
       to the path_workdir specified in the rcparams file.
+    decompose_nwp: bool
+      Defines wether or not the NWP needs to be decomposed before storing. This can
+      be beneficial for performance, because then the decomposition does not need
+      to happen during the blending anymore. It can however also be detrimental because
+      this increases the amount of storage and RAM required for the blending.
+    decompose_kwargs: dict
+      Keyword arguments passed to the decompose_NWP method.
 
     Returns
     -------
     Nothing
     """
 
+    if not NETCDF4_IMPORTED:
+        raise MissingOptionalDependency(
+            "netCDF4 package is required to save the NWP data, "
+            "but it is not installed"
+        )
+
     # Set the output file
+    analysis_time = precip_nwp_dataset.time.values[0]
     output_date = f"{analysis_time.astype('datetime64[us]').astype(datetime.datetime):%Y%m%d%H%M%S}"
-    outfn = Path(output_path) / f"motion_{nwp_model}_{output_date}.npy"
+    outfn = Path(output_path) / f"preprocessed_{nwp_model}_{output_date}.nc"
+    nwp_precip_var = precip_nwp_dataset.attrs["precip_var"]
+    precip_nwp = precip_nwp_dataset[nwp_precip_var].values
 
     # Get the velocity field per time step
-    v_nwp = np.zeros((precip_nwp.shape[0], 2, precip_nwp.shape[1], precip_nwp.shape[2]))
+    v_nwp_x = np.zeros((precip_nwp.shape[0], precip_nwp.shape[1], precip_nwp.shape[2]))
+    v_nwp_y = np.zeros((precip_nwp.shape[0], precip_nwp.shape[1], precip_nwp.shape[2]))
     # Loop through the timesteps. We need two images to construct a motion
     # field, so we can start from timestep 1.
     for t in range(1, precip_nwp.shape[0]):
-        v_nwp[t] = oflow_method(precip_nwp[t - 1 : t + 1, :, :])
+        v_nwp_dataset = oflow_method(precip_nwp_dataset.isel(time=slice(t - 1, t + 1)))
+        v_nwp_x[t] = v_nwp_dataset.velocity_x
+        v_nwp_y[t] = v_nwp_dataset.velocity_y
 
     # Make timestep 0 the same as timestep 1.
-    v_nwp[0] = v_nwp[1]
+    v_nwp_x[0] = v_nwp_x[1]
+    v_nwp_y[0] = v_nwp_y[1]
+    precip_nwp_dataset["velocity_x"] = (["time", "y", "x"], v_nwp_x)
+    precip_nwp_dataset["velocity_y"] = (["time", "y", "x"], v_nwp_y)
 
-    assert v_nwp.ndim == 4, "v_nwp must be a four-dimensional array"
+    if decompose_nwp:
+        precip_nwp_dataset = decompose_NWP(precip_nwp_dataset, **decompose_kwargs)
 
     # Save it as a numpy array
-    np.save(outfn, v_nwp)
-
-
-def load_NWP(input_nc_path_decomp, input_path_velocities, start_time, n_timesteps):
-    """Loads the decomposed NWP and velocity data from the netCDF files
-
-    Parameters
-    ----------
-    input_nc_path_decomp: str
-      Path to the saved netCDF file containing the decomposed NWP data.
-    input_path_velocities: str
-        Path to the saved numpy binary file containing the estimated velocity
-        fields from the NWP data.
-    start_time: numpy.datetime64
-      The start time of the nowcasting. Assumed to be a numpy.datetime64 type
-    n_timesteps: int
-      Number of time steps to forecast
-
-    Returns
-    -------
-    R_d: list
-      A list of dictionaries with each element in the list corresponding to
-      a different time step. Each dictionary has the same structure as the
-      output of the decomposition function
-    uv: array-like
-        Array of shape (timestep,2,m,n) containing the x- and y-components
-      of the advection field for the (NWP) model field per forecast lead time.
-    """
-
-    if not NETCDF4_IMPORTED:
-        raise MissingOptionalDependency(
-            "netCDF4 package is required to load the decomposed NWP data, "
-            "but it is not installed"
-        )
-
-    # Open the file
-    ncf_decomp = netCDF4.Dataset(input_nc_path_decomp, "r", format="NETCDF4")
-    velocities = np.load(input_path_velocities)
-
-    decomp_dict = {
-        "domain": ncf_decomp.domain,
-        "normalized": bool(ncf_decomp.normalized),
-        "compact_output": bool(ncf_decomp.compact_output),
-    }
-
-    # Convert the start time and the timestep to datetime64 and timedelta64 type
-    zero_time = np.datetime64("1970-01-01T00:00:00", "ns")
-    analysis_time = np.timedelta64(int(ncf_decomp.analysis_time), "ns") + zero_time
-
-    timestep = ncf_decomp.timestep
-    timestep = np.timedelta64(timestep, "m")
-
-    valid_times = ncf_decomp.variables["valid_times"][:]
-    valid_times = np.array(
-        [np.timedelta64(int(valid_times[i]), "ns") for i in range(len(valid_times))]
-    )
-    valid_times = valid_times + zero_time
-
-    # Find the indices corresponding with the required start and end time
-    start_i = (start_time - analysis_time) // timestep
-    assert analysis_time + start_i * timestep == start_time
-    end_i = start_i + n_timesteps + 1
-
-    # Check if the requested end time (the forecast horizon) is in the stored data.
-    # If not, raise an error
-    if end_i > ncf_decomp.variables["pr_decomposed"].shape[0]:
-        raise IndexError(
-            "The requested forecast horizon is outside the stored NWP forecast horizon. Either request a shorter forecast horizon or store a longer NWP forecast horizon"
-        )
-
-    # Add the valid times to the output
-    decomp_dict["valid_times"] = valid_times[start_i:end_i]
-
-    # Slice the velocity fields with the start and end indices
-    uv = velocities[start_i:end_i, :, :, :]
-
-    # Initialise the list of dictionaries which will serve as the output (cf: the STEPS function)
-    R_d = list()
-
-    pr_decomposed = ncf_decomp.variables["pr_decomposed"][start_i:end_i, :, :, :]
-    means = ncf_decomp.variables["means"][start_i:end_i, :]
-    stds = ncf_decomp.variables["stds"][start_i:end_i, :]
-
-    for i in range(n_timesteps + 1):
-        decomp_dict["cascade_levels"] = np.ma.filled(
-            pr_decomposed[i], fill_value=np.nan
-        )
-        decomp_dict["means"] = np.ma.filled(means[i], fill_value=np.nan)
-        decomp_dict["stds"] = np.ma.filled(stds[i], fill_value=np.nan)
-
-        R_d.append(decomp_dict.copy())
-
-    ncf_decomp.close()
-    return R_d, uv
+    precip_nwp_dataset.to_netcdf(outfn)
 
 
 def check_norain(precip_arr, precip_thr=None, norain_thr=0.0):
 
@@ -318,7 +318,12 @@ def downscale(
     noise_dataset = xr.Dataset(
         data_vars={precip_var: (["time", "y", "x"], [noise_field])},
         coords={
-            "time": (["time"], precip_dataset.time.values, precip_dataset.time.attrs),
+            "time": (
+                ["time"],
+                precip_dataset.time.values,
+                precip_dataset.time.attrs,
+                precip_dataset.time.encoding,
+            ),
             "y": (
                 ["y"],
                 y_new,