Skip to content
This repository was archived by the owner on Jun 2, 2023. It is now read-only.

Commit 3825747

Browse files
authored
Merge pull request #88 from jsadler2/nnse-samplewise
Nnse samplewise
2 parents 9f28c5c + 051f1f2 commit 3825747

File tree

3 files changed

+62
-16
lines changed

3 files changed

+62
-16
lines changed

river_dl/loss_functions.py

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,18 +19,38 @@ def rmse(y_true, y_pred):
1919
return rmse_loss
2020

2121

22-
def nse(y_true, y_pred):
22+
def sample_avg_nse(y_true, y_pred):
23+
"""
24+
calculate the sample averaged nse, i.e., it will calculate the nse across
25+
each of the samples (the 1st dimension of the arrays) and then average those
26+
"""
2327
y_true = tf.cast(y_true, tf.float32)
2428
y_pred = tf.cast(y_pred, tf.float32)
2529
zero_or_error = tf.where(
2630
tf.math.is_nan(y_true), tf.zeros_like(y_true), y_pred - y_true
2731
)
2832

29-
numerator = tf.reduce_sum(tf.square(zero_or_error))
33+
# add a small value to the deviation to prevent instability
34+
deviation = dev_masked(y_true) + 0.1
35+
36+
numerator_samplewise = tf.reduce_sum(tf.square(zero_or_error), axis=1)
37+
denomin_samplewise = tf.reduce_sum(tf.square(deviation), axis=1)
38+
nse_samplewise = 1 - numerator_samplewise/denomin_samplewise
39+
nse_samplewise_avg = tf.reduce_sum(nse_samplewise)/tf.cast(tf.shape(y_true)[0], tf.float32)
40+
return nse_samplewise_avg
41+
42+
43+
def nse(y_true, y_pred):
44+
y_true = tf.cast(y_true, tf.float32)
45+
y_pred = tf.cast(y_pred, tf.float32)
46+
zero_or_error = tf.where(
47+
tf.math.is_nan(y_true), tf.zeros_like(y_true), y_pred - y_true
48+
)
3049

3150
deviation = dev_masked(y_true)
51+
numerator = tf.reduce_sum(tf.square(zero_or_error))
3252
denominator = tf.reduce_sum(tf.square(deviation))
33-
return 1 - numerator / denominator
53+
return 1 - numerator / denominator
3454

3555

3656
def nnse(y_true, y_pred):
@@ -41,11 +61,20 @@ def nnse_loss(y_true, y_pred):
4161
return 1 - nnse(y_true, y_pred)
4262

4363

64+
def samplewise_nnse_loss(y_true, y_pred):
65+
nnse_val = 1 / (2 - sample_avg_nse(y_true, y_pred))
66+
return 1 - nnse_val
67+
68+
4469
@tf.function
4570
def nnse_masked_one_var(data, y_pred, var_idx):
4671
y_true, y_pred, weights = y_data_components(data, y_pred, var_idx)
4772
return nnse_loss(y_true, y_pred)
4873

74+
@tf.function
75+
def nnse_one_var_samplewise(data, y_pred, var_idx):
76+
y_true, y_pred, weights = y_data_components(data, y_pred, var_idx)
77+
return samplewise_nnse_loss(y_true, y_pred)
4978

5079
@tf.function
5180
def y_data_components(data, y_pred, var_idx):

river_dl/postproc_utils.py

Lines changed: 27 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def prepped_array_to_df(data_array, dates, ids, col_names):
3535
return df
3636

3737

38-
def take_first_half(df):
38+
def take_half(df, first_half=True):
3939
"""
4040
filter out the second half of the dates in the predictions. this is to
4141
retain a "test" set of the i/o data for evaluation
@@ -47,9 +47,12 @@ def take_first_half(df):
4747
df.sort_index(inplace=True)
4848
unique_dates = df.index.unique()
4949
halfway_date = unique_dates[int(len(unique_dates) / 2)]
50-
df_first_half = df.loc[:halfway_date]
51-
df_first_half.reset_index(inplace=True)
52-
return df_first_half
50+
if first_half:
51+
df_half = df.loc[:halfway_date]
52+
else:
53+
df_half = df.loc[halfway_date:]
54+
df_half.reset_index(inplace=True)
55+
return df_half
5356

5457

5558
def unscale_output(y_scl, y_std, y_mean, data_cols, logged_q=False):
@@ -197,11 +200,16 @@ def predict(model, io_data, partition, outfile, logged_q=False, half_tst=False):
197200
"""
198201
io_data = get_data_if_file(io_data)
199202

200-
# evaluate training
201-
if partition == "trn" or partition == "tst":
203+
if partition in ["trn", "tst", "ver"]:
202204
pass
203205
else:
204-
raise ValueError('partition arg needs to be "trn" or "tst"')
206+
raise ValueError('partition arg needs to be "trn" or "tst" or "ver"')
207+
208+
if partition == "ver":
209+
partition = "tst"
210+
tst_partition = "ver"
211+
elif partition == "tst":
212+
tst_partition = "tst"
205213

206214
num_segs = len(np.unique(io_data["ids_trn"]))
207215
y_pred = model.predict(io_data[f"x_{partition}"], batch_size=num_segs)
@@ -220,8 +228,12 @@ def predict(model, io_data, partition, outfile, logged_q=False, half_tst=False):
220228
logged_q,
221229
)
222230

223-
if half_tst and partition == "tst":
224-
y_pred_pp = take_first_half(y_pred_pp)
231+
if partition == "tst":
232+
if half_tst and tst_partition == "tst":
233+
y_pred_pp = take_half(y_pred_pp, first_half=True)
234+
235+
if half_tst and tst_partition == "ver":
236+
y_pred_pp = take_half(y_pred_pp, first_half=False)
225237

226238
y_pred_pp.to_feather(outfile)
227239
return y_pred_pp
@@ -372,13 +384,14 @@ def overall_metrics(
372384

373385

374386
def combined_metrics(
375-
pred_trn, pred_tst, obs_temp, obs_flow, grp=None, outfile=None
387+
pred_trn, pred_tst, obs_temp, obs_flow, pred_ver=None, grp=None, outfile=None
376388
):
377389
"""
378390
calculate the metrics for flow and temp and training and test sets for a
379391
given grouping
380392
:param pred_trn: [str] path to training prediction feather file
381393
:param pred_tst: [str] path to testing prediction feather file
394+
:param pred_tst: [str] path to verification prediction feather file
382395
:param obs_temp: [str] path to observations temperature zarr file
383396
:param obs_flow: [str] path to observations flow zarr file
384397
:param group: [str or list] which group the metrics should be computed for.
@@ -393,6 +406,10 @@ def combined_metrics(
393406
tst_temp = overall_metrics(pred_tst, obs_temp, "temp", "tst", grp)
394407
tst_flow = overall_metrics(pred_tst, obs_flow, "flow", "tst", grp)
395408
df_all = [trn_temp, tst_temp, trn_flow, tst_flow]
409+
if pred_ver:
410+
ver_temp = overall_metrics(pred_ver, obs_temp, "temp", "ver", grp)
411+
ver_flow = overall_metrics(pred_ver, obs_flow, "flow", "ver", grp)
412+
df_all.extend([ver_temp, ver_flow])
396413
df_all = pd.concat(df_all, axis=0)
397414
if outfile:
398415
df_all.to_csv(outfile, index=False)

river_dl/rnns.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from __future__ import print_function, division
33
import tensorflow as tf
44
from tensorflow.keras import layers
5-
from river_dl.loss_functions import nnse_masked_one_var
5+
from river_dl.loss_functions import nnse_masked_one_var, nnse_one_var_samplewise
66

77

88
class LSTMModel(tf.keras.Model):
@@ -47,8 +47,8 @@ def train_step(self, data):
4747
with tf.GradientTape(persistent=True) as tape:
4848
y_pred = self(x, training=True) # forward pass
4949

50-
loss_main = nnse_masked_one_var(y, y_pred, 0)
51-
loss_aux = nnse_masked_one_var(y, y_pred, 1)
50+
loss_main = nnse_one_var_samplewise(y, y_pred, 0)
51+
loss_aux = nnse_one_var_samplewise(y, y_pred, 1)
5252

5353
trainable_vars = self.trainable_variables
5454

0 commit comments

Comments
 (0)