Merge pull request #1237 from vespa-engine/boeker/handle-searchable-copies

thomasht86 · web-flow · commit c47faa49c306 · 2026-02-23T15:40:52.000+01:00
Handle multiple searchable copies in postFilterThreshold suggestion
diff --git a/tests/unit/test_evaluator.py b/tests/unit/test_evaluator.py
@@ -3221,7 +3221,28 @@ class MockVespaApp:
             def query_many(self, queries, max_concurrent=100, **kwargs):
                 return [SuccessfullMockVespaResponse()]
 
+        class SuccessfullMockTwoSCVespaResponse(SuccessfullMockVespaResponse):
+            def __init__(
+                self, hits=[], _total_count=None, _timing=None, _status_code=200
+            ):
+                super().__init__(hits, _total_count, _timing, _status_code)
+
+            def get_json(self):
+                super_trace = super().get_json()
+                super_trace["trace"]["children"][0]["[0]"]["global_filter"][
+                    "upper_limit"
+                ] = 0.489
+                super_trace["trace"]["children"][0]["[1]"]["global_filter"][
+                    "upper_limit"
+                ] = 0.51
+                return super_trace
+
+        class MockTwoSCVespaApp:
+            def query_many(self, queries, max_concurrent=100, **kwargs):
+                return [SuccessfullMockTwoSCVespaResponse()]
+
         self.mock_app = MockVespaApp()
+        self.two_sc_mock_app = MockTwoSCVespaApp()
 
     def test_run(self):
         hitratio_evaluator = VespaNNGlobalFilterHitratioEvaluator(
@@ -3233,6 +3254,19 @@ def test_run(self):
         self.assertAlmostEqual(hitratios[0][0], 0.01, delta=0.001)
         self.assertAlmostEqual(hitratios[0][1], 0.02, delta=0.001)
 
+    def test_get_searchable_copies(self):
+        hitratio_evaluator = VespaNNGlobalFilterHitratioEvaluator(
+            [{"yql": "foo"}], self.mock_app, verify_target_hits=100
+        )
+        hitratio_evaluator.run()
+        self.assertEqual(hitratio_evaluator.get_searchable_copies(), 1)
+
+        two_sc_hitratio_evaluator = VespaNNGlobalFilterHitratioEvaluator(
+            [{"yql": "foo"}], self.two_sc_mock_app, verify_target_hits=100
+        )
+        two_sc_hitratio_evaluator.run()
+        self.assertEqual(two_sc_hitratio_evaluator.get_searchable_copies(), 2)
+
 
 class TestVespaNNRecallEvaluator(unittest.TestCase):
     """Test the VespaNNRecallEvaluator class."""
@@ -3475,6 +3509,10 @@ def query_many(self, queries, max_concurrent=100, **kwargs):
             self.mock_app, [], 100, buckets_per_percent=1
         )  # 100 buckets
 
+        self.two_sc_optimizer = VespaNNParameterOptimizer(
+            self.mock_app, [], 100, buckets_per_percent=2
+        )  # 200 buckets
+
         # Percentages: 1, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 99
         self.buckets = [2, 10, 20, 40, 60, 80, 100, 120, 140, 160, 180, 190, 198]
         self.num = len(self.buckets)
@@ -3497,6 +3535,10 @@ def query_many(self, queries, max_concurrent=100, **kwargs):
         ]
         self.optimizer.distribute_to_buckets(self.queries_with_hitratios)
         self.optimizerOneBucket.distribute_to_buckets(self.queries_with_hitratios)
+        self.two_sc_optimizer.distribute_to_buckets(
+            list(map(lambda x: (x[0], x[1] / 2), self.queries_with_hitratios))
+        )
+        self.two_sc_optimizer.searchable_copies = 2
 
     def test_get_bucket_interval_width(self):
         self.assertAlmostEqual(
@@ -3672,17 +3714,16 @@ def test_get_query_distribution(self):
 
     def _assert_post_filter_threshold(
         self,
+        optimizer,
+        buckets,
         response_times_post_filtering,
         recall_post_filtering,
         response_times_pre_filtering,
         recall_pre_filtering,
         lower,
         upper,
     ):
-        buckets = self.buckets
-        filtered_out_ratios = [
-            self.optimizer.bucket_to_filtered_out(b) for b in buckets
-        ]
+        filtered_out_ratios = [optimizer.bucket_to_filtered_out(b) for b in buckets]
 
         benchmark_post_filtering = BucketedMetricResults(
             metric_name="searchtime",
@@ -3710,7 +3751,7 @@ def _assert_post_filter_threshold(
             filtered_out_ratios=filtered_out_ratios,
         )
 
-        post_filter_threshold = self.optimizer._suggest_post_filter_threshold(
+        post_filter_threshold = optimizer._suggest_post_filter_threshold(
             benchmark_post_filtering,
             recall_post_filtering,
             benchmark_pre_filtering,
@@ -3722,6 +3763,8 @@ def _assert_post_filter_threshold(
     def test_suggest_post_filter_threshold(self):
         # Should be somewhere between 40 and 50 percent
         self._assert_post_filter_threshold(
+            self.optimizer,
+            self.buckets,
             [5.0, 5.0, 5.0, 10.0, 10.0, 10.0, 15.0, 15.0, 15.0, 20.0, 20.0, 20.0, 25.0],
             [0.80] * self.num,
             [13.0] * self.num,
@@ -3732,6 +3775,8 @@ def test_suggest_post_filter_threshold(self):
 
         # Should switch earlier since recall becomes bad
         self._assert_post_filter_threshold(
+            self.optimizer,
+            self.buckets,
             [5.0, 5.0, 5.0, 10.0, 10.0, 10.0, 15.0, 15.0, 15.0, 20.0, 20.0, 20.0, 25.0],
             [
                 0.80,
@@ -3756,6 +3801,8 @@ def test_suggest_post_filter_threshold(self):
 
         # Should not switch since recall too bad
         self._assert_post_filter_threshold(
+            self.optimizer,
+            self.buckets,
             [5.0, 5.0, 5.0, 10.0, 10.0, 10.0, 15.0, 15.0, 15.0, 20.0, 20.0, 20.0, 25.0],
             [0.70] * self.num,
             [13.0] * self.num,
@@ -3766,6 +3813,8 @@ def test_suggest_post_filter_threshold(self):
 
         # Should not switch since response time bad
         self._assert_post_filter_threshold(
+            self.optimizer,
+            self.buckets,
             [25.0] * self.num,
             [0.80] * self.num,
             [13.0] * self.num,
@@ -3774,6 +3823,60 @@ def test_suggest_post_filter_threshold(self):
             0.001,
         )
 
+    def test_suggest_post_filter_threshold_with_two_searchable_copies(self):
+        two_sc_buckets = self.two_sc_optimizer.get_non_empty_buckets()
+        two_sc_num = len(two_sc_buckets)
+
+        # Do not use post filtering
+        self._assert_post_filter_threshold(
+            self.two_sc_optimizer,
+            two_sc_buckets,
+            [25.0] * two_sc_num,
+            [0.80] * two_sc_num,
+            [13.0] * two_sc_num,
+            [0.80] * two_sc_num,
+            0.0,
+            0.001,
+        )
+
+        # Should be somewhere between 40 and 50 percent
+        self._assert_post_filter_threshold(
+            self.two_sc_optimizer,
+            two_sc_buckets,
+            [5.0, 5.0, 5.0, 10.0, 10.0, 10.0, 15.0, 15.0, 15.0, 20.0, 20.0, 20.0, 25.0],
+            [0.80] * two_sc_num,
+            [13.0] * two_sc_num,
+            [0.80] * two_sc_num,
+            0.40,
+            0.50,
+        )
+
+        # Should switch earlier since recall becomes bad
+        self._assert_post_filter_threshold(
+            self.two_sc_optimizer,
+            two_sc_buckets,
+            [5.0, 5.0, 5.0, 10.0, 10.0, 10.0, 15.0, 15.0, 15.0, 20.0, 20.0, 20.0, 25.0],
+            [
+                0.80,
+                0.80,
+                0.70,
+                0.70,
+                0.70,
+                0.70,
+                0.70,
+                0.70,
+                0.70,
+                0.70,
+                0.70,
+                0.70,
+                0.70,
+            ],
+            [13] * two_sc_num,
+            [0.80] * two_sc_num,
+            0.05,
+            0.10,
+        )
+
     def _assert_approximate_threshold(
         self,
         response_times_exact,
diff --git a/vespa/evaluation/_base.py b/vespa/evaluation/_base.py
@@ -1780,6 +1780,7 @@ def __init__(
         self.queries = queries
         self.app = app
         self.verify_target_hits = verify_target_hits
+        self.searchable_copies = None
 
     def run(self):
         """
@@ -1834,6 +1835,16 @@ def extract_from_trace(obj: dict, type_name: str):
                     and blueprint["global_filter"]["calculated"]
                 ):
                     hit_ratios.append(blueprint["global_filter"]["hit_ratio"])
+                    actual_upper_limit = blueprint["global_filter"]["upper_limit"]
+                    if actual_upper_limit is not None and actual_upper_limit > 0.0:
+                        searchable_copies = round(1.0 / actual_upper_limit)
+                        if self.searchable_copies is None:
+                            self.searchable_copies = searchable_copies
+                        else:
+                            if self.searchable_copies != searchable_copies:
+                                print(
+                                    f"Searchable copies mismatch: {searchable_copies} vs. {self.searchable_copies} found earlier"
+                                )
 
                 if (
                     self.verify_target_hits is not None
@@ -1847,6 +1858,15 @@ def extract_from_trace(obj: dict, type_name: str):
 
         return all_hit_ratios
 
+    def get_searchable_copies(self) -> int | None:
+        """
+        Returns number of searchable copies determined during hit-ratio computation.
+
+        Returns:
+            int: Number of searchable copies used by Vespa application.
+        """
+        return self.searchable_copies
+
 
 class VespaNNRecallEvaluator:
     """
@@ -2188,6 +2208,8 @@ def __init__(
         self.max_concurrent = max_concurrent
         self.id_field = id_field
 
+        self.searchable_copies = None
+
     def get_bucket_interval_width(self) -> float:
         """
         Gets the width of the interval represented by a single bucket.
@@ -2336,6 +2358,7 @@ def determine_hit_ratios_and_distribute_to_buckets(
             queries, self.app, verify_target_hits=self.hits
         )
         hitratio_list = hitratio_evaluator.run()
+        self.searchable_copies = hitratio_evaluator.get_searchable_copies()
 
         for i in range(0, len(hitratio_list)):
             hitratios = hitratio_list[i]
@@ -2890,7 +2913,12 @@ def _suggest_post_filter_threshold(
                 threshold = i
                 response_time_gain = current_gain
 
-        return self.bucket_to_hitratio(threshold)
+        suggestion = self.bucket_to_hitratio(threshold)
+        if self.searchable_copies is not None:
+            suggestion = suggestion * self.searchable_copies
+            suggestion = min(suggestion, 1.0)
+
+        return suggestion
 
     def _test_filter_first_exploration(
         self, filter_first_exploration: float