Skip to content

Commit c47faa4

Browse files
authored
Merge pull request #1237 from vespa-engine/boeker/handle-searchable-copies
Handle multiple searchable copies in postFilterThreshold suggestion
2 parents 61512df + e047063 commit c47faa4

File tree

2 files changed

+137
-6
lines changed

2 files changed

+137
-6
lines changed

tests/unit/test_evaluator.py

Lines changed: 108 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3221,7 +3221,28 @@ class MockVespaApp:
32213221
def query_many(self, queries, max_concurrent=100, **kwargs):
32223222
return [SuccessfullMockVespaResponse()]
32233223

3224+
class SuccessfullMockTwoSCVespaResponse(SuccessfullMockVespaResponse):
3225+
def __init__(
3226+
self, hits=[], _total_count=None, _timing=None, _status_code=200
3227+
):
3228+
super().__init__(hits, _total_count, _timing, _status_code)
3229+
3230+
def get_json(self):
3231+
super_trace = super().get_json()
3232+
super_trace["trace"]["children"][0]["[0]"]["global_filter"][
3233+
"upper_limit"
3234+
] = 0.489
3235+
super_trace["trace"]["children"][0]["[1]"]["global_filter"][
3236+
"upper_limit"
3237+
] = 0.51
3238+
return super_trace
3239+
3240+
class MockTwoSCVespaApp:
3241+
def query_many(self, queries, max_concurrent=100, **kwargs):
3242+
return [SuccessfullMockTwoSCVespaResponse()]
3243+
32243244
self.mock_app = MockVespaApp()
3245+
self.two_sc_mock_app = MockTwoSCVespaApp()
32253246

32263247
def test_run(self):
32273248
hitratio_evaluator = VespaNNGlobalFilterHitratioEvaluator(
@@ -3233,6 +3254,19 @@ def test_run(self):
32333254
self.assertAlmostEqual(hitratios[0][0], 0.01, delta=0.001)
32343255
self.assertAlmostEqual(hitratios[0][1], 0.02, delta=0.001)
32353256

3257+
def test_get_searchable_copies(self):
3258+
hitratio_evaluator = VespaNNGlobalFilterHitratioEvaluator(
3259+
[{"yql": "foo"}], self.mock_app, verify_target_hits=100
3260+
)
3261+
hitratio_evaluator.run()
3262+
self.assertEqual(hitratio_evaluator.get_searchable_copies(), 1)
3263+
3264+
two_sc_hitratio_evaluator = VespaNNGlobalFilterHitratioEvaluator(
3265+
[{"yql": "foo"}], self.two_sc_mock_app, verify_target_hits=100
3266+
)
3267+
two_sc_hitratio_evaluator.run()
3268+
self.assertEqual(two_sc_hitratio_evaluator.get_searchable_copies(), 2)
3269+
32363270

32373271
class TestVespaNNRecallEvaluator(unittest.TestCase):
32383272
"""Test the VespaNNRecallEvaluator class."""
@@ -3475,6 +3509,10 @@ def query_many(self, queries, max_concurrent=100, **kwargs):
34753509
self.mock_app, [], 100, buckets_per_percent=1
34763510
) # 100 buckets
34773511

3512+
self.two_sc_optimizer = VespaNNParameterOptimizer(
3513+
self.mock_app, [], 100, buckets_per_percent=2
3514+
) # 200 buckets
3515+
34783516
# Percentages: 1, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 99
34793517
self.buckets = [2, 10, 20, 40, 60, 80, 100, 120, 140, 160, 180, 190, 198]
34803518
self.num = len(self.buckets)
@@ -3497,6 +3535,10 @@ def query_many(self, queries, max_concurrent=100, **kwargs):
34973535
]
34983536
self.optimizer.distribute_to_buckets(self.queries_with_hitratios)
34993537
self.optimizerOneBucket.distribute_to_buckets(self.queries_with_hitratios)
3538+
self.two_sc_optimizer.distribute_to_buckets(
3539+
list(map(lambda x: (x[0], x[1] / 2), self.queries_with_hitratios))
3540+
)
3541+
self.two_sc_optimizer.searchable_copies = 2
35003542

35013543
def test_get_bucket_interval_width(self):
35023544
self.assertAlmostEqual(
@@ -3672,17 +3714,16 @@ def test_get_query_distribution(self):
36723714

36733715
def _assert_post_filter_threshold(
36743716
self,
3717+
optimizer,
3718+
buckets,
36753719
response_times_post_filtering,
36763720
recall_post_filtering,
36773721
response_times_pre_filtering,
36783722
recall_pre_filtering,
36793723
lower,
36803724
upper,
36813725
):
3682-
buckets = self.buckets
3683-
filtered_out_ratios = [
3684-
self.optimizer.bucket_to_filtered_out(b) for b in buckets
3685-
]
3726+
filtered_out_ratios = [optimizer.bucket_to_filtered_out(b) for b in buckets]
36863727

36873728
benchmark_post_filtering = BucketedMetricResults(
36883729
metric_name="searchtime",
@@ -3710,7 +3751,7 @@ def _assert_post_filter_threshold(
37103751
filtered_out_ratios=filtered_out_ratios,
37113752
)
37123753

3713-
post_filter_threshold = self.optimizer._suggest_post_filter_threshold(
3754+
post_filter_threshold = optimizer._suggest_post_filter_threshold(
37143755
benchmark_post_filtering,
37153756
recall_post_filtering,
37163757
benchmark_pre_filtering,
@@ -3722,6 +3763,8 @@ def _assert_post_filter_threshold(
37223763
def test_suggest_post_filter_threshold(self):
37233764
# Should be somewhere between 40 and 50 percent
37243765
self._assert_post_filter_threshold(
3766+
self.optimizer,
3767+
self.buckets,
37253768
[5.0, 5.0, 5.0, 10.0, 10.0, 10.0, 15.0, 15.0, 15.0, 20.0, 20.0, 20.0, 25.0],
37263769
[0.80] * self.num,
37273770
[13.0] * self.num,
@@ -3732,6 +3775,8 @@ def test_suggest_post_filter_threshold(self):
37323775

37333776
# Should switch earlier since recall becomes bad
37343777
self._assert_post_filter_threshold(
3778+
self.optimizer,
3779+
self.buckets,
37353780
[5.0, 5.0, 5.0, 10.0, 10.0, 10.0, 15.0, 15.0, 15.0, 20.0, 20.0, 20.0, 25.0],
37363781
[
37373782
0.80,
@@ -3756,6 +3801,8 @@ def test_suggest_post_filter_threshold(self):
37563801

37573802
# Should not switch since recall too bad
37583803
self._assert_post_filter_threshold(
3804+
self.optimizer,
3805+
self.buckets,
37593806
[5.0, 5.0, 5.0, 10.0, 10.0, 10.0, 15.0, 15.0, 15.0, 20.0, 20.0, 20.0, 25.0],
37603807
[0.70] * self.num,
37613808
[13.0] * self.num,
@@ -3766,6 +3813,8 @@ def test_suggest_post_filter_threshold(self):
37663813

37673814
# Should not switch since response time bad
37683815
self._assert_post_filter_threshold(
3816+
self.optimizer,
3817+
self.buckets,
37693818
[25.0] * self.num,
37703819
[0.80] * self.num,
37713820
[13.0] * self.num,
@@ -3774,6 +3823,60 @@ def test_suggest_post_filter_threshold(self):
37743823
0.001,
37753824
)
37763825

3826+
def test_suggest_post_filter_threshold_with_two_searchable_copies(self):
3827+
two_sc_buckets = self.two_sc_optimizer.get_non_empty_buckets()
3828+
two_sc_num = len(two_sc_buckets)
3829+
3830+
# Do not use post filtering
3831+
self._assert_post_filter_threshold(
3832+
self.two_sc_optimizer,
3833+
two_sc_buckets,
3834+
[25.0] * two_sc_num,
3835+
[0.80] * two_sc_num,
3836+
[13.0] * two_sc_num,
3837+
[0.80] * two_sc_num,
3838+
0.0,
3839+
0.001,
3840+
)
3841+
3842+
# Should be somewhere between 40 and 50 percent
3843+
self._assert_post_filter_threshold(
3844+
self.two_sc_optimizer,
3845+
two_sc_buckets,
3846+
[5.0, 5.0, 5.0, 10.0, 10.0, 10.0, 15.0, 15.0, 15.0, 20.0, 20.0, 20.0, 25.0],
3847+
[0.80] * two_sc_num,
3848+
[13.0] * two_sc_num,
3849+
[0.80] * two_sc_num,
3850+
0.40,
3851+
0.50,
3852+
)
3853+
3854+
# Should switch earlier since recall becomes bad
3855+
self._assert_post_filter_threshold(
3856+
self.two_sc_optimizer,
3857+
two_sc_buckets,
3858+
[5.0, 5.0, 5.0, 10.0, 10.0, 10.0, 15.0, 15.0, 15.0, 20.0, 20.0, 20.0, 25.0],
3859+
[
3860+
0.80,
3861+
0.80,
3862+
0.70,
3863+
0.70,
3864+
0.70,
3865+
0.70,
3866+
0.70,
3867+
0.70,
3868+
0.70,
3869+
0.70,
3870+
0.70,
3871+
0.70,
3872+
0.70,
3873+
],
3874+
[13] * two_sc_num,
3875+
[0.80] * two_sc_num,
3876+
0.05,
3877+
0.10,
3878+
)
3879+
37773880
def _assert_approximate_threshold(
37783881
self,
37793882
response_times_exact,

vespa/evaluation/_base.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1780,6 +1780,7 @@ def __init__(
17801780
self.queries = queries
17811781
self.app = app
17821782
self.verify_target_hits = verify_target_hits
1783+
self.searchable_copies = None
17831784

17841785
def run(self):
17851786
"""
@@ -1834,6 +1835,16 @@ def extract_from_trace(obj: dict, type_name: str):
18341835
and blueprint["global_filter"]["calculated"]
18351836
):
18361837
hit_ratios.append(blueprint["global_filter"]["hit_ratio"])
1838+
actual_upper_limit = blueprint["global_filter"]["upper_limit"]
1839+
if actual_upper_limit is not None and actual_upper_limit > 0.0:
1840+
searchable_copies = round(1.0 / actual_upper_limit)
1841+
if self.searchable_copies is None:
1842+
self.searchable_copies = searchable_copies
1843+
else:
1844+
if self.searchable_copies != searchable_copies:
1845+
print(
1846+
f"Searchable copies mismatch: {searchable_copies} vs. {self.searchable_copies} found earlier"
1847+
)
18371848

18381849
if (
18391850
self.verify_target_hits is not None
@@ -1847,6 +1858,15 @@ def extract_from_trace(obj: dict, type_name: str):
18471858

18481859
return all_hit_ratios
18491860

1861+
def get_searchable_copies(self) -> int | None:
1862+
"""
1863+
Returns number of searchable copies determined during hit-ratio computation.
1864+
1865+
Returns:
1866+
int: Number of searchable copies used by Vespa application.
1867+
"""
1868+
return self.searchable_copies
1869+
18501870

18511871
class VespaNNRecallEvaluator:
18521872
"""
@@ -2188,6 +2208,8 @@ def __init__(
21882208
self.max_concurrent = max_concurrent
21892209
self.id_field = id_field
21902210

2211+
self.searchable_copies = None
2212+
21912213
def get_bucket_interval_width(self) -> float:
21922214
"""
21932215
Gets the width of the interval represented by a single bucket.
@@ -2336,6 +2358,7 @@ def determine_hit_ratios_and_distribute_to_buckets(
23362358
queries, self.app, verify_target_hits=self.hits
23372359
)
23382360
hitratio_list = hitratio_evaluator.run()
2361+
self.searchable_copies = hitratio_evaluator.get_searchable_copies()
23392362

23402363
for i in range(0, len(hitratio_list)):
23412364
hitratios = hitratio_list[i]
@@ -2890,7 +2913,12 @@ def _suggest_post_filter_threshold(
28902913
threshold = i
28912914
response_time_gain = current_gain
28922915

2893-
return self.bucket_to_hitratio(threshold)
2916+
suggestion = self.bucket_to_hitratio(threshold)
2917+
if self.searchable_copies is not None:
2918+
suggestion = suggestion * self.searchable_copies
2919+
suggestion = min(suggestion, 1.0)
2920+
2921+
return suggestion
28942922

28952923
def _test_filter_first_exploration(
28962924
self, filter_first_exploration: float

0 commit comments

Comments
 (0)