Implement Brier Score/Brier Skill Score (#592)

samland1116 · web-flow · commit a5afc2456235 · 2025-12-08T14:03:01.000-06:00
* adds brier_score to probabilistic metrics, updates routine to calculate metric skill score

* adds new ensemble test data, adds attribute to BS_ATTRS

* updates test
diff --git a/src/teehr/evaluation/metrics.py b/src/teehr/evaluation/metrics.py
@@ -239,11 +239,34 @@ def _post_process_metric_results(
         """
         for model in include_metrics:
             if model.reference_configuration is not None:
+                """
                 self.df = self._calculate_metric_skill_score(
                     model.output_field_name,
                     model.reference_configuration,
                     group_by
                 )
+                """
+                # 1) get the original cols ahead of skill score join
+                original_cols = self.df.columns
+                # 2) calculate skill score sdf
+                sdf = self._calculate_metric_skill_score(
+                    model.output_field_name,
+                    model.reference_configuration,
+                    group_by
+                )
+                # 3) remove original metric column from skill score sdf
+                sdf = sdf.drop(model.output_field_name)
+                # 3) get join columns
+                join_cols = parse_fields_to_list(group_by)
+                # 4) join returned table back to self.df, trim
+                self.df = self.df.join(
+                    sdf,
+                    on=join_cols,
+                    how="left"
+                ).select(
+                    *original_cols,
+                    F.col(f"{model.output_field_name}_skill_score")
+                )
 
             if model.unpack_results:
                 self.df = model.unpack_function(
@@ -292,11 +315,20 @@ def _calculate_metric_skill_score(
             temp_col = f"{config}_{metric_field}_skill"
             pivot_sdf = pivot_sdf.withColumn(
                 temp_col,
-                1 - F.col(config) / F.col(reference_configuration)
+                1 - F.try_divide(F.col(config), F.col(reference_configuration))
             ).withColumn(
                 "configuration_name",
                 F.lit(config)
             )
+            # warn user if try_divide results in nulls (division by zero)
+            null_count = pivot_sdf.filter(F.col(temp_col).isNull()).count()
+            if null_count > 0:
+                logger.warning(
+                    f"Division by zero encountered when calculating skill "
+                    f"score for configuration '{config}' relative to "
+                    f"reference configuration '{reference_configuration}'. "
+                    f"{null_count} null values were produced."
+                )
             # Join skill score values from the pivot table.
             join_cols = group_by_strings + ["configuration_name"]
             sdf = sdf.join(
diff --git a/src/teehr/metrics/probabilistic_funcs.py b/src/teehr/metrics/probabilistic_funcs.py
@@ -104,3 +104,89 @@ def ensemble_crps_inner(
             )
 
     return ensemble_crps_inner
+
+
+def _get_brier_score_inputs(pivoted_dict: dict,
+                            threshold: float) -> dict:
+    """Obtain inputs for scoringrules.brier_score from pivoted dict."""
+    # get quantile flow
+    p = pivoted_dict['primary']
+    q_threshold = np.quantile(p, threshold)
+
+    # get binary outcomes of observed exceeding threshold
+    binary_p = np.where(p >= q_threshold, 1, 0)
+
+    # get fraction of ensemble members exceeding threshold for each time step
+    s = pivoted_dict['secondary']
+    binary_s = np.where(s >= q_threshold, 1, 0)
+    if len(binary_s.shape) == 1:
+        # only one ensemble member
+        frac_exceeds_s = binary_s
+    else:
+        frac_exceeds_s = np.mean(binary_s, axis=1)
+
+    # assemble inputs dict
+    brier_score_inputs = {
+        'primary': binary_p,
+        'secondary': frac_exceeds_s
+    }
+
+    return brier_score_inputs
+
+
+def ensemble_brier_score(model: MetricsBasemodel) -> Callable:
+    """Create the Brier Score ensemble metric function."""
+    logger.debug("Building the Brier Score ensemble metric func.")
+
+    def ensemble_brier_score_inner(
+        p: pd.Series,
+        s: pd.Series,
+        members: pd.Series,
+    ) -> float:
+        """Create a wrapper around scoringrules brier_score.
+
+        Parameters
+        ----------
+        p : pd.Series
+            The primary values.
+        s : pd.Series
+            The secondary values.
+        members : pd.Series
+            The member IDs.
+        threshold : float
+            The threshold for the Brier Score calculation.
+
+        Returns
+        -------
+        float
+            The mean Brier Score for the ensemble, either as a single value
+            or array of values.
+        """
+        # lazy load scoringrules
+        import scoringrules as sr
+
+        # p, s, value_time = _transform(p, s, model, value_time)
+        # pivoted_dict = _pivot_by_value_time(p, s, value_time)
+        pivoted_dict = _pivot_by_member(p, s, members)
+
+        bs_inputs = _get_brier_score_inputs(
+            pivoted_dict,
+            model.threshold
+        )
+
+        if model.summary_func is not None:
+            return model.summary_func(
+                sr.brier_score(
+                    bs_inputs["primary"],
+                    bs_inputs["secondary"],
+                    backend=model.backend
+                )
+            )
+        else:
+            return sr.brier_score(
+                bs_inputs["primary"],
+                bs_inputs["secondary"],
+                backend=model.backend
+            )
+
+    return ensemble_brier_score_inner
diff --git a/src/teehr/models/metrics/metric_attributes.py b/src/teehr/models/metrics/metric_attributes.py
@@ -255,6 +255,15 @@
     "requires_threshold_field": False,
 }
 
+BS_ENSEMBLE_ATTRS = {
+    "short_name": "brier_score_ensemble",
+    "display_name": "Brier Score - Ensemble",
+    "category": mc.Probabilistic,
+    "value_range": [0.0, 1.0],
+    "optimal_value": 0.0,
+    "requires_threshold_field": False,
+}
+
 FDC_SLOPE_ATTRS = {
     "short_name": "fdc_slope",
     "display_name": "Flow Duration Curve Slope",
diff --git a/src/teehr/models/metrics/probabilistic_models.py b/src/teehr/models/metrics/probabilistic_models.py
@@ -46,6 +46,41 @@ class CRPS(ProbabilisticBasemodel):
     attrs: Dict = Field(default=tma.CRPS_ENSEMBLE_ATTRS, frozen=True)
 
 
+class BrierScore(ProbabilisticBasemodel):
+    """Brier Score for ensemble probabilistic forecasts.
+
+    Parameters
+    ----------
+    threshold : float
+        The threshold to use for binary event definition.
+    backend : str
+        The backend to use, by default "numba". Can be ("numba" or "numpy").
+    summary_func : Callable
+        The function to apply to the results, by default np.mean.
+    output_field_name : str
+        The output field name, by default "mean_brier_score".
+    func : Callable
+        The function to apply to the data, by default
+        :func:`probabilistic_funcs.ensemble_brier_score`.
+    input_field_names : Union[str, StrEnum, List[Union[str, StrEnum]]]
+        The input field names, by default
+        ["primary_value", "secondary_value", "member"].
+    attrs : Dict
+        The static attributes for the metric.
+    """
+
+    threshold: float = Field(default=0.75)
+    transform: TransformEnum = Field(default=None)
+    backend: str = Field(default="numba")
+    output_field_name: str = Field(default="mean_brier_score")
+    func: Callable = Field(probabilistic_funcs.ensemble_brier_score, frozen=True)
+    summary_func: Union[Callable, None] = Field(default=None)
+    input_field_names: Union[str, StrEnum, List[Union[str, StrEnum]]] = Field(
+        default=["primary_value", "secondary_value", "member"]
+    )
+    attrs: Dict = Field(default=tma.BS_ENSEMBLE_ATTRS, frozen=True)
+
+
 class ProbabilisticMetrics:
     """Define and customize probalistic metrics.
 
@@ -59,3 +94,4 @@ class ProbabilisticMetrics:
     """
 
     CRPS = CRPS
+    BrierScore = BrierScore
diff --git a/tests/data/setup_v0_5_ensemble_study.py b/tests/data/setup_v0_5_ensemble_study.py
@@ -0,0 +1,76 @@
+import teehr
+from pathlib import Path
+import pandas as pd
+from shapely.geometry import Point
+import geopandas as gpd
+
+TEST_STUDY_DATA_DIR_V0_5 = Path("tests", "data", "v0_5_ensemble_study")
+
+
+def setup_v0_5_ensemble_study(tmpdir):
+    """Create a test evaluation with ensemble forecasts using teehr."""
+    # define pathing
+    location_xwalk_path = TEST_STUDY_DATA_DIR_V0_5 / "location_crosswalks.parquet"
+    primary_ts_path = TEST_STUDY_DATA_DIR_V0_5 / "primary_timeseries.parquet"
+    secondary_ts_path = TEST_STUDY_DATA_DIR_V0_5 / "secondary_timeseries.parquet"
+    configurations_path = TEST_STUDY_DATA_DIR_V0_5 / "configurations.parquet"
+    variables_path = TEST_STUDY_DATA_DIR_V0_5 / "variables.parquet"
+
+    # initialize evaluation
+    ev = teehr.Evaluation(dir_path=tmpdir)
+    ev.enable_logging()
+    ev.clone_template()
+
+    # create locations
+    location_dict = {
+        'id': 'obs-GILN6',
+        'name': 'Schoharie Creek at Schoharie',
+        'geometry': Point(-74.45043182373047, 42.397300720214844),
+    }
+    gdf = gpd.GeoDataFrame(
+        [location_dict],
+        geometry='geometry',
+        crs="EPSG:4269"
+        )
+    ev.locations.load_dataframe(df=gdf, write_mode="overwrite")
+
+    # load crosswalk
+    ev.location_crosswalks.load_parquet(
+        in_path=location_xwalk_path
+    )
+
+    # add configurations
+    df = pd.read_parquet(configurations_path)
+    for _, row in df.iterrows():
+        ev.configurations.add(
+            teehr.Configuration(
+                name=row["name"],
+                type=row["type"],
+                description=row["description"]
+            )
+        )
+
+    # add variables table
+    df = pd.read_parquet(variables_path)
+    for _, row in df.iterrows():
+        ev.variables.add(
+            teehr.Variable(
+                name=row["name"],
+                long_name=row["long_name"],
+            )
+        )
+
+    # load primary timeseries
+    ev.primary_timeseries.load_parquet(
+        in_path=primary_ts_path
+    )
+
+    # load secondary timeseries
+    ev.secondary_timeseries.load_parquet(
+        in_path=secondary_ts_path
+    )
+
+    # create JTS
+    ev.joined_timeseries.create(add_attrs=False, execute_scripts=True)
+
+    return ev
diff --git a/tests/data/v0_5_ensemble_study/configurations.parquet b/tests/data/v0_5_ensemble_study/configurations.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:494922481bbc395a13192449163db43d31ea7eae1b3fb519bb6370b816fb5fc6
+size 3252
diff --git a/tests/data/v0_5_ensemble_study/location_crosswalks.parquet b/tests/data/v0_5_ensemble_study/location_crosswalks.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c29d9bf222a29f0b61f08c290350b0692a9a13e4e98364073cc5a21dec412977
+size 2598
diff --git a/tests/data/v0_5_ensemble_study/primary_timeseries.parquet b/tests/data/v0_5_ensemble_study/primary_timeseries.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:be10143a8baef8e794384d0ff4fe735afd0b89249fff66bc28a9a24edf2c745d
+size 663557
diff --git a/tests/data/v0_5_ensemble_study/secondary_timeseries.parquet b/tests/data/v0_5_ensemble_study/secondary_timeseries.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:165184dd98de07d5be1b039a116329d1091f3bc9efb5c9d1124a58ca4a01ab84
+size 1665132
diff --git a/tests/data/v0_5_ensemble_study/variables.parquet b/tests/data/v0_5_ensemble_study/variables.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5e007ba5f8f4f11a9a392a0ed1b9f1b39ad48accd8ced199b8ac3129ce43ba95
+size 2574
diff --git a/tests/query/test_get_metrics_query.py b/tests/query/test_get_metrics_query.py

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+version https://git-lfs.github.com/spec/v1`
	`2`	`+oid sha256:494922481bbc395a13192449163db43d31ea7eae1b3fb519bb6370b816fb5fc6`
	`3`	`+size 3252`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+version https://git-lfs.github.com/spec/v1`
	`2`	`+oid sha256:c29d9bf222a29f0b61f08c290350b0692a9a13e4e98364073cc5a21dec412977`
	`3`	`+size 2598`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+version https://git-lfs.github.com/spec/v1`
	`2`	`+oid sha256:be10143a8baef8e794384d0ff4fe735afd0b89249fff66bc28a9a24edf2c745d`
	`3`	`+size 663557`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+version https://git-lfs.github.com/spec/v1`
	`2`	`+oid sha256:165184dd98de07d5be1b039a116329d1091f3bc9efb5c9d1124a58ca4a01ab84`
	`3`	`+size 1665132`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+version https://git-lfs.github.com/spec/v1`
	`2`	`+oid sha256:5e007ba5f8f4f11a9a392a0ed1b9f1b39ad48accd8ced199b8ac3129ce43ba95`
	`3`	`+size 2574`