add test: expected calibration error metric

Kucharssim · Kucharssim · commit 7f097446bef6 · 2025-02-24T10:52:11.000+01:00
diff --git a/tests/test_diagnostics/conftest.py b/tests/test_diagnostics/conftest.py
@@ -1,5 +1,6 @@
 import numpy as np
 import pytest
+from bayesflow.utils.numpy_utils import softmax
 
 
 @pytest.fixture()
@@ -31,3 +32,22 @@ def random_priors():
         "sigma": np.random.standard_normal(size=(64, 1)),
         "y": np.random.standard_normal(size=(64, 3, 1)),
     }
+
+
+@pytest.fixture()
+def model_names():
+    return [r"$\mathcal{M}_0$", r"$\mathcal{M}_1$", r"$\mathcal{M}_2$"]
+
+
+@pytest.fixture()
+def true_models():
+    true_models = np.random.choice(3, 100)
+    true_models = np.eye(3)[true_models].astype(np.int32)
+    return true_models
+
+
+@pytest.fixture()
+def pred_models(true_models):
+    pred_models = np.random.normal(loc=true_models)
+    pred_models = softmax(pred_models, axis=-1)
+    return pred_models
diff --git a/tests/test_diagnostics/test_diagnostics_metrics.py b/tests/test_diagnostics/test_diagnostics_metrics.py
@@ -1,4 +1,5 @@
 import bayesflow as bf
+import pytest
 
 
 def num_variables(x: dict):
@@ -47,3 +48,26 @@ def test_root_mean_squared_error(random_estimates, random_targets):
     assert out["values"].shape == (num_variables(random_estimates),)
     assert out["metric_name"] == "NRMSE"
     assert out["variable_names"] == ["beta_0", "beta_1", "sigma"]
+
+
+def test_expected_calibration_error(pred_models, true_models, model_names):
+    out = bf.diagnostics.metrics.expected_calibration_error(pred_models, true_models, model_names=model_names)
+    assert list(out.keys()) == ["values", "metric_name", "model_names"]
+    assert out["values"].shape == (pred_models.shape[-1],)
+    assert out["metric_name"] == "Expected Calibration Error"
+    assert out["model_names"] == [r"$\mathcal{M}_0$", r"$\mathcal{M}_1$", r"$\mathcal{M}_2$"]
+
+    # returns probs?
+    out = bf.diagnostics.metrics.expected_calibration_error(pred_models, true_models, return_probs=True)
+    assert list(out.keys()) == ["values", "metric_name", "model_names", "probs_true", "probs_pred"]
+    assert len(out["probs_true"]) == pred_models.shape[-1]
+    assert len(out["probs_pred"]) == pred_models.shape[-1]
+    # default: auto model names
+    assert out["model_names"] == ["M_0", "M_1", "M_2"]
+
+    # handles incorrect input?
+    with pytest.raises(Exception):
+        out = bf.diagnostics.metrics.expected_calibration_error(pred_models, true_models, model_names=["a"])
+
+    with pytest.raises(Exception):
+        out = bf.diagnostics.metrics.expected_calibration_error(pred_models, true_models.transpose)