refactor: split tolerance report generation

Dayuxiaoshui · Dayuxiaoshui · commit a4aa31fd6852 · 2025-11-17T18:31:59.000+08:00
diff --git a/graph_net/analysis_util.py b/graph_net/analysis_util.py
@@ -582,13 +582,13 @@ def print_stat_info(
         print(f"  - Details for tolerance={t_key}:")
         if total_samples > 0:
             # Calculate all aggregated parameters using the dedicated module
-            aggregated_params = samples_statistics.calculate_all_aggregated_parameters(
+            aggregated_params = samples_statistics.calculate_es_components_values(
                 total_samples=total_samples,
                 correct_speedups=correct_speedups,
                 errno2count=errno2count,
-                t_key=t_key,
+                tolerance=t_key,
                 negative_speedup_penalty=negative_speedup_penalty,
-                fpdb=fpdb,
+                b=fpdb,
                 pi=pi,
             )
 
@@ -597,8 +597,12 @@ def print_stat_info(
             lambda_ = aggregated_params["lambda"]
             eta = aggregated_params["eta"]
             gamma = aggregated_params["gamma"]
-            expected_s = aggregated_params["s_t"]
-            expected_es = aggregated_params["es_t"]
+            expected_s = samples_statistics.calculate_s_t_from_aggregated(
+                alpha, beta, lambda_, eta, negative_speedup_penalty, fpdb
+            )
+            expected_es = samples_statistics.calculate_es_t_from_aggregated(
+                alpha, beta, lambda_, eta, gamma, negative_speedup_penalty
+            )
 
             print(
                 f"    - alpha: {alpha:.3f} (Geometric mean speedup of correct samples)"
diff --git a/graph_net/samples_statistics.py b/graph_net/samples_statistics.py
@@ -149,6 +149,10 @@ def calculate_pi(
     """
     correct_count = len(correct_speedups)
     error_count = total_samples - correct_count
+    counted_errors = sum(errno2count.values())
+    assert (
+        error_count == counted_errors
+    ), f"error_count mismatch: got {error_count}, but errno2count sums to {counted_errors}"
     if error_count == 0:
         return {errno: 0.0 for errno in errno2count.keys()}
 
@@ -158,10 +162,36 @@ def calculate_pi(
     return pi
 
 
+def resolve_errno_tolerance(
+    errno2count: dict[int, int], custom_map: dict[int, int] | None
+) -> dict[int, int]:
+    """
+    Build a sorted errno -> tolerance map for downstream gamma calculation.
+
+    Args:
+        errno2count: Observed errno occurrences in the dataset.
+        custom_map: Optional overrides mapping errno to its minimal tolerated tolerance.
+
+    Returns:
+        Ordered dict (by errno) mapping each errno seen in errno2count
+        to the tolerance value where it becomes tolerated. Defaults to:
+        - errno 1 (accuracy) -> 1
+        - errno >=2 (runtime/compile) -> 3
+    """
+    custom_map = custom_map or {}
+
+    def tolerance_for(errno: int) -> int:
+        if errno in custom_map:
+            return custom_map[errno]
+        return 1 if errno == 1 else 3
+
+    return {errno: tolerance_for(errno) for errno in sorted(errno2count.keys())}
+
+
 def calculate_gamma(
     tolerance: int,
-    get_pi: Callable[[int], float],
-    errno_tolerances: list[int],
+    pi_value4errno: Callable[[int], float],
+    errno_as_tolerances: dict[int, int],
     b: float = 0.1,
 ) -> float:
     """
@@ -172,26 +202,24 @@ def calculate_gamma(
 
     Args:
         tolerance: Tolerance level t
-        get_pi: Function that takes error type index c and returns π_c (proportion of error type c)
-        errno_tolerances: List of tolerance thresholds for each error type.
-            Index corresponds to error type index c, value is the threshold.
-            An error type is tolerated (not penalized) when t >= threshold.
+        pi_value4errno: Function that takes errno and returns π_c (proportion of error type c).
+        errno_as_tolerances: Mapping of errno to tolerance thresholds.
+            An error type is tolerated (not penalized) when t >= threshold for that errno.
         b: Base penalty for severe errors (default: 0.1)
 
     Returns:
         Gamma value (average error penalty)
     """
-    if len(errno_tolerances) == 0:
+    if tolerance <= 0:
         return b
 
-    # Calculate indicator for each error type: 1 if not tolerated, 0 if tolerated
-    pi_sum = 0.0
-    for error_type_index in range(len(errno_tolerances)):
-        pi_c = get_pi(error_type_index)
-        threshold_c = errno_tolerances[error_type_index]
-        # Error type is not tolerated (penalized) when t < threshold
-        indicator = 1 if tolerance < threshold_c else 0
-        pi_sum += pi_c * indicator
+    # Calculate indicator-weighted pi sum for errnos that are not tolerated
+    pi_sum = sum(
+        pi_value
+        for errno, errno_tolerance in errno_as_tolerances.items()
+        for pi_value in [pi_value4errno(errno)]
+        if tolerance < errno_tolerance
+    )
 
     return b**pi_sum
 
@@ -202,7 +230,7 @@ def calculate_s_t_from_aggregated(
     lambda_: float,
     eta: float,
     negative_speedup_penalty: float,
-    fpdb: float,
+    b: float,
 ) -> float:
     """
     Calculate S(t) from aggregated parameters.
@@ -215,15 +243,15 @@ def calculate_s_t_from_aggregated(
         lambda_: Fraction of correct samples
         eta: Fraction of slowdown cases within correct samples
         negative_speedup_penalty: Penalty power p for negative speedup
-        fpdb: Base penalty b for severe errors
+        b: Base penalty for severe errors or accuracy violation
 
     Returns:
         S(t) value calculated from aggregated parameters
     """
     return (
         alpha**lambda_
         * beta ** (lambda_ * eta * negative_speedup_penalty)
-        * fpdb ** (1 - lambda_)
+        * b ** (1 - lambda_)
     )
 
 
@@ -258,85 +286,60 @@ def calculate_es_t_from_aggregated(
     )
 
 
-def calculate_all_aggregated_parameters(
+def calculate_es_components_values(
     total_samples: int,
     correct_speedups: list[float],
     errno2count: dict[int, int],
-    t_key: int,
+    tolerance: int,
     negative_speedup_penalty: float = 0.0,
-    fpdb: float = 0.1,
+    b: float = 0.1,
     pi: dict[int, float] | None = None,
-    errno_tolerance_thresholds: dict[int, int] | None = None,
+    errno_as_tolerance: dict[int, int] | None = None,
 ) -> dict:
     """
-    Calculate all aggregated parameters for a given tolerance level.
-
-    This is a convenience function that calculates all aggregated parameters at once.
+    Calculate aggregated parameters for a given tolerance level.
 
     Args:
         total_samples: Total number of samples
         correct_speedups: List of speedup values for correct samples
         errno2count: Dictionary mapping errno (error number) to their counts.
             Errno values: 1=accuracy, 2=runtime, 3=compilation.
-        t_key: Tolerance level
+        tolerance: Tolerance level
         negative_speedup_penalty: Penalty power p for negative speedup
-        fpdb: Base penalty b for severe errors
+        b: Base penalty for severe errors or accuracy violation
         pi: Dictionary mapping errno to their proportions (calculated at t=1).
             If None, will be calculated from errno2count.
-        errno_tolerance_thresholds: Dictionary mapping errno to their tolerance thresholds.
-            An error type is tolerated (not penalized) when t >= threshold.
-            If None, uses default thresholds: {1: 1} for accuracy errors (errno=1), {2: 3, 3: 3} for others.
+        errno_as_tolerance: Mapping from errno to its minimum tolerated tolerance.
+            An error type is tolerated (not penalized) when tolerance >= its value.
+            If None, defaults to {1: 1} for accuracy, {2: 3, 3: 3} for others.
 
     Returns:
-        Dictionary containing all aggregated parameters and calculated scores:
+        Dictionary containing ES(t) component values:
         {
             'alpha': float,
             'beta': float,
             'lambda': float,
             'eta': float,
             'gamma': float,
-            'pi': dict[int, float],
-            's_t': float,
-            'es_t': float
+            'pi': dict[int, float]
         }
     """
-    # Use default error tolerance thresholds if not provided
-    if errno_tolerance_thresholds is None:
-        errno_tolerance_thresholds = {}
-        for errno in errno2count.keys():
-            if errno == 1:  # accuracy errors
-                errno_tolerance_thresholds[errno] = 1
-            else:  # runtime (2) or compilation (3) errors
-                errno_tolerance_thresholds[errno] = 3
-
     # Calculate pi if not provided
     if pi is None:
         pi = calculate_pi(errno2count, total_samples, correct_speedups)
 
-    # Convert dictionary-based pi and thresholds to indexed format for calculate_gamma
-    # Create ordered list of errnos for consistent indexing (sorted by errno)
-    errnos = sorted(errno2count.keys())
-    errno_tolerances = [errno_tolerance_thresholds.get(errno, 3) for errno in errnos]
+    # Prepare errno-ordered tolerance mapping for calculate_gamma
+    errno_as_tolerances = resolve_errno_tolerance(errno2count, errno_as_tolerance)
 
-    # Create get_pi function that maps error type index to pi value
-    def get_pi(error_type_index: int) -> float:
-        if error_type_index < len(errnos):
-            errno = errnos[error_type_index]
-            return pi.get(errno, 0.0)
-        return 0.0
+    # Create pi_value4errno function that maps errno to pi value
+    def pi_value4errno(errno: int) -> float:
+        return pi.get(errno, 0.0)
 
     alpha = calculate_alpha(correct_speedups)
     beta = calculate_beta(correct_speedups)
     lambda_ = calculate_lambda(correct_speedups, total_samples)
     eta = calculate_eta(correct_speedups)
-    gamma = calculate_gamma(t_key, get_pi, errno_tolerances, fpdb)
-
-    s_t = calculate_s_t_from_aggregated(
-        alpha, beta, lambda_, eta, negative_speedup_penalty, fpdb
-    )
-    es_t = calculate_es_t_from_aggregated(
-        alpha, beta, lambda_, eta, gamma, negative_speedup_penalty
-    )
+    gamma = calculate_gamma(tolerance, pi_value4errno, errno_as_tolerances, b)
 
     return {
         "alpha": alpha,
@@ -345,6 +348,4 @@ def get_pi(error_type_index: int) -> float:
         "eta": eta,
         "gamma": gamma,
         "pi": pi,
-        "s_t": s_t,
-        "es_t": es_t,
     }
diff --git a/graph_net/verify_aggregated_params.py b/graph_net/verify_aggregated_params.py