Skip to content

Commit 6883500

Browse files
authored
Merge pull request #20 from ecmwf/fix/no_danu_dep
refactor: port crps logic across from danu to hyve
2 parents e3f5f96 + 91a2952 commit 6883500

File tree

1 file changed

+82
-6
lines changed

1 file changed

+82
-6
lines changed

src/hyve/tools/crps_stations.py

Lines changed: 82 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,86 @@
1010
import numpy as np
1111
import pandas as pd
1212
import xarray as xr
13-
from danu import stats, utils
13+
14+
15+
def crps_minmax(x, y):
16+
"""
17+
Computes CRPS from x using y as reference,
18+
first x dimension must be ensembles, next dimensions can be arbitrary
19+
x: ensemble data (n_ens, n_points)
20+
y: observation/analysis data (n_points)
21+
returns: crps (n_points)
22+
REFERENCE
23+
Hersbach, 2000: Decomposition of the Continuous Ranked Probability Score for Ensemble Prediction Systems.
24+
Weather and Forecasting 15: 559-570.
25+
"""
26+
27+
# first sort ensemble
28+
x.sort(axis=0)
29+
30+
# construct alpha and beta, size nens+1
31+
n_ens = x.shape[0]
32+
shape = (n_ens + 1,) + x.shape[1:]
33+
alpha = np.zeros(shape)
34+
beta = np.zeros(shape)
35+
36+
# x[i+1]-x[i] and x[i]-y[i] arrays
37+
diffxy = x - y.reshape(1, *(y.shape))
38+
diffxx = x[1:] - x[:-1] # x[i+1]-x[i], size ens-1
39+
40+
# if i == 0
41+
alpha[0] = 0
42+
beta[0] = np.fmax(diffxy[0], 0) # x(0)-y
43+
# if i == n_ens
44+
alpha[-1] = np.fmax(-diffxy[-1], 0) # y-x(n)
45+
beta[-1] = 0
46+
# else
47+
alpha[1:-1] = np.fmin(
48+
diffxx, np.fmax(-diffxy[:-1], 0)
49+
) # x(i+1)-x(i) or y-x(i) or 0
50+
beta[1:-1] = np.fmin(diffxx, np.fmax(diffxy[1:], 0)) # 0 or x(i+1)-y or x(i+1)-x(i)
51+
52+
# compute crps
53+
p_exp = (np.arange(n_ens + 1) / float(n_ens)).reshape(n_ens + 1, *([1] * y.ndim))
54+
crps = np.sum(alpha * (p_exp**2) + beta * ((1 - p_exp) ** 2), axis=0)
55+
#
56+
# p = np.arange(n_ens+1)/float(n_ens)
57+
# alpha_mean = alpha.mean(axis=1)
58+
# beat_mean = beta.mean(axis=1)
59+
# crps = alpha_mean*(p**2) + beat_mean*((1-p)**2)
60+
# crps_mean = crps2.sum()
61+
#
62+
# p_exp = np.expand_dims(np.arange(n_ens+1)/float(n_ens), axis=1)
63+
# crps = np.nansum(alpha*(p_exp**2) + beta*((1-p_exp)**2), axis=0)
64+
# crps_mean = crps.mean()
65+
return crps
66+
67+
68+
def crps_masked(x, y):
69+
n_steps = x.shape[0]
70+
71+
mask = np.logical_not(np.isnan(x[0, 0]))
72+
crps = np.ones(y.shape) * np.nan
73+
for i in range(n_steps):
74+
xi = x[i]
75+
yi = y[i]
76+
crps_masked = crps_minmax(xi[:, mask], yi[mask])
77+
crps[i][mask] = crps_masked
78+
79+
return crps
80+
81+
82+
def forecast_crps(x, y, core_dims=["lat", "lon"]):
83+
crps = xr.apply_ufunc(
84+
crps_masked,
85+
x,
86+
y,
87+
input_core_dims=[["ensemble", *core_dims], core_dims],
88+
dask="parallelized",
89+
output_core_dims=[core_dims],
90+
output_dtypes=[np.float32],
91+
)
92+
return crps
1493

1594

1695
@dask.delayed
@@ -58,7 +137,6 @@ def coord_dmh(dates):
58137
return days_months
59138

60139

61-
@utils.timer
62140
def compute_score(
63141
out_dir, reforecast_dir, ds_reanalysis, ds_clim, core_dims, with_init=False
64142
):
@@ -113,14 +191,12 @@ def compute_score(
113191
persistence = ds_reanalysis.sel(time=date_persistence)
114192

115193
crps_pers = persistence_crps(reanalysis, persistence)
116-
crps_refo = stats.forecast_crps(reforecast, reanalysis, core_dims=core_dims)
194+
crps_refo = forecast_crps(reforecast, reanalysis, core_dims=core_dims)
117195
if ds_clim is not None:
118196
log.debug(coord_dmh(date_range))
119197
climatology = ds_clim.sel(time=coord_dmh(date_range))
120198
climatology.coords["time"] = date_range
121-
crps_clim = stats.forecast_crps(
122-
climatology, reanalysis, core_dims=core_dims
123-
)
199+
crps_clim = forecast_crps(climatology, reanalysis, core_dims=core_dims)
124200
crps_refo, crps_pers, crps_clim = dask.compute(
125201
crps_refo, crps_pers, crps_clim
126202
)

0 commit comments

Comments
 (0)