Remove double-correction of objectives in pareto_store script

enarjord · claude · enarjord · commit 597e18cf629a · 2026-02-24T10:02:16.000-05:00
The ratio-based objective correction in pareto_store.py main() would
double-inflate objectives for entries produced by the fixed optimizer
(which already writes correct aggregated values into flat_stats).

Removed the correction block — objectives are now passed through
unchanged. The optimize.py fix ensures new entries have correct
objectives at creation time. Limit filtering (-l) still correctly
uses aggregated values from suite_metrics.

Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/src/pareto_store.py b/src/pareto_store.py
@@ -609,25 +609,6 @@ def parse_limit_expr(expr: str) -> LimitSpec:
                 )
                 stats_flat.update(stats_flat_suite)
                 aggregated_values.update(aggregated_values_suite)
-                # Correct objectives for scoring metrics whose aggregate method
-                # is not "mean".  The stored w_i was computed as metric_mean * weight;
-                # the correct value is metric_agg * weight.  We apply a ratio
-                # correction (agg / mean) so we don't need the scoring weights.
-                constraint_violation = metrics_block.get("constraint_violation", 0.0)
-                if aggregate_cfg and not constraint_violation:
-                    scoring_keys = entry.get("optimize", {}).get("scoring", [])
-                    for idx, sk in enumerate(scoring_keys):
-                        mode = _resolve_aggregate_mode(sk, aggregate_cfg)
-                        if mode == "mean":
-                            continue
-                        w_key = f"w_{idx}"
-                        stored = objectives.get(w_key)
-                        if stored is None:
-                            continue
-                        agg_val = aggregated_values.get(sk)
-                        mean_val = stats_flat.get(f"{sk}_mean")
-                        if agg_val is not None and mean_val and mean_val != 0.0:
-                            objectives[w_key] = stored * (agg_val / mean_val)
             if not w_keys:
                 all_w_keys = sorted(k for k in objectives if k.startswith("w_"))
 
diff --git a/tests/test_aggregate_methods.py b/tests/test_aggregate_methods.py
@@ -324,13 +324,14 @@ def test_override_changes_objective(self):
 
 
 # ---------------------------------------------------------------------------
-# pareto_store.main() objective correction via ratio
+# pareto_store.main() does NOT mutate stored objectives
 # ---------------------------------------------------------------------------
 
 
-class TestObjectiveCorrectionRatio:
-    """Test the ratio-based correction applied in pareto_store main() for
-    scoring metrics with non-default aggregate methods."""
+class TestObjectivesNotDoubleCorrected:
+    """The optimizer (optimize.py) now writes correct objectives at creation
+    time.  pareto_store.py must NOT apply a second correction on top, which
+    would inflate values for entries produced by the fixed optimizer."""
 
     AGGREGATE_CFG = {
         "default": "mean",
@@ -339,16 +340,11 @@ class TestObjectiveCorrectionRatio:
         "position_held_hours_max": "max",
     }
 
-    def _make_pareto_entry(self, scoring_keys, mean_values, max_values, weights=None):
-        """Build a pareto entry with suite_metrics in the 'metrics' format."""
-        if weights is None:
-            weights = {k: 1.0 for k in scoring_keys}
-        objectives = {}
+    def _make_pareto_entry(self, scoring_keys, objective_values, mean_values, max_values):
+        """Build a pareto entry as the fixed optimizer would produce it."""
+        objectives = {f"w_{i}": v for i, v in enumerate(objective_values)}
         suite_metric_payloads = {}
-        for i, sk in enumerate(scoring_keys):
-            w = weights.get(sk, 1.0)
-            # Stored objectives were computed with _mean (the bug)
-            objectives[f"w_{i}"] = mean_values[sk] * w
+        for sk in scoring_keys:
             suite_metric_payloads[sk] = {
                 "stats": {
                     "mean": mean_values[sk],
@@ -372,102 +368,38 @@ def _make_pareto_entry(self, scoring_keys, mean_values, max_values, weights=None
             "suite_metrics": {"metrics": suite_metric_payloads},
         }
 
-    def test_correction_applied_for_max_aggregate(self):
+    def test_new_entry_objectives_unchanged(self):
+        """Objectives produced by the fixed optimizer must pass through
+        pareto_store untouched — no ratio correction applied."""
         scoring = ["adg_pnl", "high_exposure_hours_max_long"]
         means = {"adg_pnl": 0.001, "high_exposure_hours_max_long": 150.0}
         maxes = {"adg_pnl": 0.001, "high_exposure_hours_max_long": 300.0}
-        entry = self._make_pareto_entry(scoring, means, maxes)
+        # Fixed optimizer: adg_pnl uses mean (0.001), exposure uses max (300)
+        entry = self._make_pareto_entry(scoring, [0.001, 300.0], means, maxes)
 
-        # Extract and correct (same logic as main())
+        # Reproduce pareto_store.main() logic
         metrics_block = entry["metrics"]
-        objectives = dict(metrics_block["objectives"])
-        aggregate_cfg = entry["backtest"]["aggregate"]
-        stats_flat_suite, aggregated_values = _suite_metrics_to_stats(
-            entry, aggregate_cfg=aggregate_cfg,
-        )
-
-        constraint_violation = metrics_block.get("constraint_violation", 0.0)
-        assert not constraint_violation
-
-        for idx, sk in enumerate(scoring):
-            mode = _resolve_aggregate_mode(sk, aggregate_cfg)
-            if mode == "mean":
-                continue
-            w_key = f"w_{idx}"
-            stored = objectives.get(w_key)
-            agg_val = aggregated_values.get(sk)
-            mean_val = stats_flat_suite.get(f"{sk}_mean")
-            if agg_val is not None and mean_val and mean_val != 0.0:
-                objectives[w_key] = stored * (agg_val / mean_val)
-
-        # adg_pnl (mean aggregate) should be unchanged
-        assert objectives["w_0"] == pytest.approx(0.001)
-        # high_exposure_hours_max_long: stored=150*1.0=150, corrected=150*(300/150)=300
-        assert objectives["w_1"] == pytest.approx(300.0)
-
-    def test_correction_preserves_weight_sign(self):
-        """Ratio correction preserves the scoring weight direction."""
-        scoring = ["peak_recovery_hours_pnl"]
-        means = {"peak_recovery_hours_pnl": 200.0}
-        maxes = {"peak_recovery_hours_pnl": 500.0}
-        weights = {"peak_recovery_hours_pnl": 1.0}
-        entry = self._make_pareto_entry(scoring, means, maxes, weights)
-
-        metrics_block = entry["metrics"]
-        objectives = dict(metrics_block["objectives"])
-        aggregate_cfg = entry["backtest"]["aggregate"]
-        stats_flat_suite, aggregated_values = _suite_metrics_to_stats(
-            entry, aggregate_cfg=aggregate_cfg,
-        )
-
-        stored = objectives["w_0"]
-        assert stored == pytest.approx(200.0)  # mean * weight(1.0)
-
-        agg_val = aggregated_values["peak_recovery_hours_pnl"]
-        mean_val = stats_flat_suite["peak_recovery_hours_pnl_mean"]
-        objectives["w_0"] = stored * (agg_val / mean_val)
-
-        assert objectives["w_0"] == pytest.approx(500.0)  # max * weight(1.0)
-
-    def test_no_correction_with_constraint_violation(self):
-        scoring = ["high_exposure_hours_max_long"]
-        means = {"high_exposure_hours_max_long": 150.0}
-        maxes = {"high_exposure_hours_max_long": 300.0}
-        entry = self._make_pareto_entry(scoring, means, maxes)
-        entry["metrics"]["constraint_violation"] = 5000.0
-
-        metrics_block = entry["metrics"]
-        objectives = dict(metrics_block["objectives"])
-        aggregate_cfg = entry["backtest"]["aggregate"]
-
-        constraint_violation = metrics_block.get("constraint_violation", 0.0)
-        # Should skip correction
-        assert constraint_violation
-        # Objective remains at the stored (mean-based) value
-        assert objectives["w_0"] == pytest.approx(150.0)
-
-    def test_no_correction_for_mean_aggregate_metric(self):
-        scoring = ["adg_pnl"]
-        means = {"adg_pnl": 0.001}
-        maxes = {"adg_pnl": 0.0015}
-        entry = self._make_pareto_entry(scoring, means, maxes)
-
-        metrics_block = entry["metrics"]
-        objectives = dict(metrics_block["objectives"])
-        aggregate_cfg = entry["backtest"]["aggregate"]
-
-        mode = _resolve_aggregate_mode("adg_pnl", aggregate_cfg)
-        assert mode == "mean"
-        # No correction needed
+        objectives = dict(metrics_block.get("objectives", metrics_block))
+        aggregate_cfg = entry.get("backtest", {}).get("aggregate")
+        if "suite_metrics" in entry:
+            stats_flat_suite, aggregated_values = _suite_metrics_to_stats(
+                entry, aggregate_cfg=aggregate_cfg,
+            )
+        # main() does NOT modify objectives — verify they are unchanged
         assert objectives["w_0"] == pytest.approx(0.001)
+        assert objectives["w_1"] == pytest.approx(300.0)  # NOT 600
 
-    def test_no_correction_without_aggregate_cfg(self):
+    def test_limit_filtering_uses_aggregated_values(self):
+        """Even without objective correction, -l limit filtering still uses
+        the correctly aggregated values from suite_metrics."""
         scoring = ["high_exposure_hours_max_long"]
         means = {"high_exposure_hours_max_long": 150.0}
         maxes = {"high_exposure_hours_max_long": 300.0}
-        entry = self._make_pareto_entry(scoring, means, maxes)
-        del entry["backtest"]["aggregate"]
+        entry = self._make_pareto_entry(scoring, [300.0], means, maxes)
 
         aggregate_cfg = entry.get("backtest", {}).get("aggregate")
-        assert aggregate_cfg is None
-        # Without cfg, no correction is attempted
+        _, aggregated_values = _suite_metrics_to_stats(
+            entry, aggregate_cfg=aggregate_cfg,
+        )
+        # Limit filtering sees the correct max value (300), not the mean (150)
+        assert aggregated_values["high_exposure_hours_max_long"] == 300.0