Implement health score computation

lukmaz · The Meridian Authors · commit e23bcf2aec8d · 2026-02-17T13:12:35.000-08:00
PiperOrigin-RevId: 871462554
diff --git a/meridian/analysis/review/constants.py b/meridian/analysis/review/constants.py
@@ -42,3 +42,13 @@
 Q1 = "q1"
 Q3 = "q3"
 BAYESIAN_PPP = "bayesian_ppp"
+
+# Health score constants
+R2_MIDPOINT = 0.5
+R2_STEEPNESS = 15
+FAIL_RATIO_POWER = 0.3
+HEALTH_SCORE_WEIGHT_BASELINE = 0.3
+HEALTH_SCORE_WEIGHT_BAYESIAN_PPP = 0.3
+HEALTH_SCORE_WEIGHT_GOF = 0.1
+HEALTH_SCORE_WEIGHT_PRIOR_POSTERIOR_SHIFT = 0.15
+HEALTH_SCORE_WEIGHT_ROI_CONSISTENCY = 0.15
diff --git a/meridian/analysis/review/results.py b/meridian/analysis/review/results.py
@@ -600,11 +600,13 @@ class ReviewSummary:
     overall_status: The overall status of all checks.
     summary_message: A summary message of all checks.
     results: A list of all check results.
+    health_score: The health score of the model.
   """
 
   overall_status: Status
   summary_message: str
   results: list[CheckResult]
+  health_score: float
 
   def __repr__(self) -> str:
     report = []
@@ -613,6 +615,7 @@ def __repr__(self) -> str:
     report.append("=" * 40)
     report.append(f"Overall Status: {self.overall_status.name}")
     report.append(f"Summary: {self.summary_message}")
+    report.append(f"Health Score: {self.health_score:.1f}")
     report.append("\nCheck Results:")
 
     for result in self.results:
diff --git a/meridian/analysis/review/results_test.py b/meridian/analysis/review/results_test.py
@@ -370,5 +370,35 @@ def test_prior_posterior_shift_result_recommendation(
     self.assertEqual(result.recommendation, expected_recommendation)
 
 
+class ReviewSummaryTest(parameterized.TestCase):
+
+  def test_review_summary_repr(self):
+    mock_result = results.ConvergenceCheckResult(
+        case=results.ConvergenceCases.CONVERGED,
+        config=configs.ConvergenceConfig(),
+        max_rhat=1.0,
+        max_parameter="mock_var",
+    )
+    summary = results.ReviewSummary(
+        overall_status=results.Status.PASS,
+        summary_message="summary",
+        results=[mock_result],
+        health_score=95.2,
+    )
+    expected_repr = """========================================
+Model Quality Checks
+========================================
+Overall Status: PASS
+Summary: summary
+Health Score: 95.2
+
+Check Results:
+----------------------------------------
+Convergence Check:
+  Status: PASS
+  Recommendation: The model has likely converged, as all parameters have R-hat values < 1.2."""
+    self.assertMultiLineEqual(str(summary), expected_repr)
+
+
 if __name__ == "__main__":
   absltest.main()
diff --git a/meridian/analysis/review/reviewer.py b/meridian/analysis/review/reviewer.py
@@ -14,16 +14,19 @@
 
 """Implementation of the runner of the Model Quality Checks."""
 
+from collections.abc import MutableMapping
+import dataclasses
 import typing
 
 import immutabledict
 from meridian import constants
 from meridian.analysis import analyzer as analyzer_module
 from meridian.analysis.review import checks
 from meridian.analysis.review import configs
+from meridian.analysis.review import constants as review_constants
 from meridian.analysis.review import results
 from meridian.model import prior_distribution
-
+import numpy as np
 
 CheckType = typing.Type[checks.BaseCheck]
 ConfigInstance = configs.BaseConfig
@@ -38,6 +41,141 @@
 })
 
 
+def _get_baseline_score(
+    baseline_check_result: results.BaselineCheckResult,
+) -> float:
+  """Returns the score of the Baseline check."""
+  negative_baseline_prob = baseline_check_result.negative_baseline_prob
+  baseline_config = baseline_check_result.config
+  review_threshold = baseline_config.negative_baseline_prob_review_threshold
+  fail_threshold = baseline_config.negative_baseline_prob_fail_threshold
+
+  return 100.0 * (
+      1.0
+      - np.clip(
+          (negative_baseline_prob - review_threshold)
+          / (fail_threshold - review_threshold),
+          0,
+          1,
+      )
+  )
+
+
+def _get_bayesian_ppp_score(
+    bayesian_ppp_check_result: results.BayesianPPPCheckResult,
+) -> float:
+  """Returns the score of the Bayesian PPP check."""
+  bayesian_ppp = bayesian_ppp_check_result.bayesian_ppp
+  bayesian_ppp_config = bayesian_ppp_check_result.config
+  ppp_threshold = bayesian_ppp_config.ppp_threshold
+  return 100.0 if bayesian_ppp > ppp_threshold else 0.0
+
+
+def _get_gof_score(
+    goodness_of_fit_check_result: results.GoodnessOfFitCheckResult,
+) -> float:
+  """Returns the score of the Goodness of Fit check."""
+  r_squared = goodness_of_fit_check_result.metrics.r_squared
+  return 100.0 / (
+      1
+      + np.exp(
+          -review_constants.R2_STEEPNESS
+          * (r_squared - review_constants.R2_MIDPOINT)
+      )
+  )
+
+
+def _get_pps_score(
+    prior_posterior_shift_check_result: results.PriorPosteriorShiftCheckResult,
+) -> float:
+  """Returns the score of the Prior-Posterior Shift check."""
+  prior_posterior_shift_ratio = len(
+      prior_posterior_shift_check_result.no_shift_channels
+  ) / len(prior_posterior_shift_check_result.channel_results)
+  return (
+      100.0
+      * (1.0 - np.clip(prior_posterior_shift_ratio, 0, 1))
+      ** review_constants.FAIL_RATIO_POWER
+  )
+
+
+def _get_roi_consistency_score(
+    roi_consistency_check_result: results.ROIConsistencyCheckResult,
+) -> float:
+  """Returns the score of the ROI Consistency check."""
+  roi_consistency_failure_ratio = sum(
+      1
+      for r in roi_consistency_check_result.channel_results
+      if r.case.status != results.Status.PASS
+  ) / len(roi_consistency_check_result.channel_results)
+  return (
+      100.0
+      * (1.0 - np.clip(roi_consistency_failure_ratio, 0, 1))
+      ** review_constants.FAIL_RATIO_POWER
+  )
+
+
+@dataclasses.dataclass(frozen=True)
+class _HealthScoreComponent:
+  """A component used in the calculation of the overall health score.
+
+  Attributes:
+    check_type: The class of the check this component represents.
+    score_function: A callable that takes the check result and returns a float
+      score.
+    result_type: The expected type of the result object for this check.
+    weight: The weight of this component in the overall health score
+      calculation.
+    is_required: Whether this check is required to be present for the health
+      score to be computed.
+  """
+
+  check_type: CheckType
+  score_function: typing.Callable[[typing.Any], float]
+  result_type: typing.Type[results.CheckResult]
+  weight: float
+  is_required: bool
+
+
+_HEALTH_SCORE_COMPONENTS = (
+    _HealthScoreComponent(
+        check_type=checks.BaselineCheck,
+        score_function=_get_baseline_score,
+        result_type=results.BaselineCheckResult,
+        weight=review_constants.HEALTH_SCORE_WEIGHT_BASELINE,
+        is_required=True,
+    ),
+    _HealthScoreComponent(
+        check_type=checks.BayesianPPPCheck,
+        score_function=_get_bayesian_ppp_score,
+        result_type=results.BayesianPPPCheckResult,
+        weight=review_constants.HEALTH_SCORE_WEIGHT_BAYESIAN_PPP,
+        is_required=True,
+    ),
+    _HealthScoreComponent(
+        check_type=checks.GoodnessOfFitCheck,
+        score_function=_get_gof_score,
+        result_type=results.GoodnessOfFitCheckResult,
+        weight=review_constants.HEALTH_SCORE_WEIGHT_GOF,
+        is_required=True,
+    ),
+    _HealthScoreComponent(
+        check_type=checks.PriorPosteriorShiftCheck,
+        score_function=_get_pps_score,
+        result_type=results.PriorPosteriorShiftCheckResult,
+        weight=review_constants.HEALTH_SCORE_WEIGHT_PRIOR_POSTERIOR_SHIFT,
+        is_required=False,
+    ),
+    _HealthScoreComponent(
+        check_type=checks.ROIConsistencyCheck,
+        score_function=_get_roi_consistency_score,
+        result_type=results.ROIConsistencyCheckResult,
+        weight=review_constants.HEALTH_SCORE_WEIGHT_ROI_CONSISTENCY,
+        is_required=False,
+    ),
+)
+
+
 class ModelReviewer:
   """A tool for executing a series of quality checks on a Meridian model.
 
@@ -57,15 +195,15 @@ def __init__(
       meridian,
   ):
     self._meridian = meridian
-    self._results: list[results.CheckResult] = []
+    self._results: MutableMapping[CheckType, results.CheckResult] = {}
     self._analyzer = analyzer_module.Analyzer(
         model_context=meridian.model_context,
         inference_data=meridian.inference_data,
     )
 
-  def _run_and_handle(self, check_class, config):
-    instance = check_class(self._meridian, self._analyzer, config)  # pytype: disable=not-instantiable
-    self._results.append(instance.run())
+  def _run_and_handle(self, check_class: CheckType, config: configs.BaseConfig):
+    instance: checks.BaseCheck = check_class(self._meridian, self._analyzer, config)  # pytype: disable=not-instantiable
+    self._results[check_class] = instance.run()
 
   def _uses_roi_priors(self):
     """Checks if the model uses ROI priors."""
@@ -102,22 +240,60 @@ def _has_custom_roi_priors(self):
         return True
     return False
 
+  def _compute_health_score(self) -> float:
+    """Computes the health score of the model.
+
+    Raises:
+      ValueError: If any required checks are missing from the results.
+
+    Returns:
+      The computed health score.
+    """
+    missing_checks = [
+        comp.check_type.__name__
+        for comp in _HEALTH_SCORE_COMPONENTS
+        if comp.is_required and comp.check_type not in self._results
+    ]
+    if missing_checks:
+      raise ValueError(
+          "The following required checks results are missing:"
+          f" {missing_checks}."
+      )
+
+    scores_and_weights = [
+        (
+            comp.score_function(
+                typing.cast(comp.result_type, self._results[comp.check_type])
+            ),
+            comp.weight,
+        )
+        for comp in _HEALTH_SCORE_COMPONENTS
+        if comp.check_type in self._results
+    ]
+
+    sum_score = sum(score * weight for score, weight in scores_and_weights)
+    total_weight = sum(weight for _, weight in scores_and_weights)
+
+    return sum_score / total_weight if total_weight else 0.0
+
   def run(self) -> results.ReviewSummary:
     """Executes all checks and generates the final summary."""
-    self._results.clear()
+    self._results = {}
     self._run_and_handle(checks.ConvergenceCheck, configs.ConvergenceConfig())
 
     # Stop if the model did not converge.
     if (
         self._results
-        and self._results[0].case is results.ConvergenceCases.NOT_CONVERGED
+        and self._results[checks.ConvergenceCheck].case
+        is results.ConvergenceCases.NOT_CONVERGED
     ):
       return results.ReviewSummary(
           overall_status=results.Status.FAIL,
           summary_message=(
               "Failed: Model did not converge. Other checks were skipped."
           ),
-          results=self._results,
+          results=list(self._results.values()),
+          health_score=0.0,
       )
 
     # Run all other checks in sequence.
@@ -138,10 +314,11 @@ def run(self) -> results.ReviewSummary:
 
     # Determine the final overall status.
     has_failures = any(
-        res.case.status is results.Status.FAIL for res in self._results
+        res.case.status is results.Status.FAIL for res in self._results.values()
     )
     has_reviews = any(
-        res.case.status is results.Status.REVIEW for res in self._results
+        res.case.status is results.Status.REVIEW
+        for res in self._results.values()
     )
 
     if has_failures and has_reviews:
@@ -167,5 +344,6 @@ def run(self) -> results.ReviewSummary:
     return results.ReviewSummary(
         overall_status=overall_status,
         summary_message=summary_message,
-        results=self._results,
+        results=list(self._results.values()),
+        health_score=self._compute_health_score(),
     )
diff --git a/meridian/analysis/review/reviewer_test.py b/meridian/analysis/review/reviewer_test.py