Skip to content

Commit 5d73081

Browse files
committed
Add unit tests for multiple searchable copies
1 parent d31cf09 commit 5d73081

File tree

1 file changed

+108
-5
lines changed

1 file changed

+108
-5
lines changed

tests/unit/test_evaluator.py

Lines changed: 108 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3221,7 +3221,28 @@ class MockVespaApp:
32213221
def query_many(self, queries, max_concurrent=100, **kwargs):
32223222
return [SuccessfullMockVespaResponse()]
32233223

3224+
class SuccessfullMockTwoSCVespaResponse(SuccessfullMockVespaResponse):
3225+
def __init__(
3226+
self, hits=[], _total_count=None, _timing=None, _status_code=200
3227+
):
3228+
super().__init__(hits, _total_count, _timing, _status_code)
3229+
3230+
def get_json(self):
3231+
super_trace = super().get_json()
3232+
super_trace["trace"]["children"][0]["[0]"]["global_filter"][
3233+
"upper_limit"
3234+
] = 0.489
3235+
super_trace["trace"]["children"][0]["[0]"]["global_filter"][
3236+
"upper_limit"
3237+
] = 0.498
3238+
return super_trace
3239+
3240+
class MockTwoSCVespaApp:
3241+
def query_many(self, queries, max_concurrent=100, **kwargs):
3242+
return [SuccessfullMockTwoSCVespaResponse()]
3243+
32243244
self.mock_app = MockVespaApp()
3245+
self.two_sc_mock_app = MockTwoSCVespaApp()
32253246

32263247
def test_run(self):
32273248
hitratio_evaluator = VespaNNGlobalFilterHitratioEvaluator(
@@ -3233,6 +3254,19 @@ def test_run(self):
32333254
self.assertAlmostEqual(hitratios[0][0], 0.01, delta=0.001)
32343255
self.assertAlmostEqual(hitratios[0][1], 0.02, delta=0.001)
32353256

3257+
def test_get_searchable_copies(self):
3258+
hitratio_evaluator = VespaNNGlobalFilterHitratioEvaluator(
3259+
[{"yql": "foo"}], self.mock_app, verify_target_hits=100
3260+
)
3261+
hitratio_evaluator.run()
3262+
self.assertEqual(hitratio_evaluator.get_searchable_copies(), 1)
3263+
3264+
two_sc_hitratio_evaluator = VespaNNGlobalFilterHitratioEvaluator(
3265+
[{"yql": "foo"}], self.two_sc_mock_app, verify_target_hits=100
3266+
)
3267+
two_sc_hitratio_evaluator.run()
3268+
self.assertEqual(two_sc_hitratio_evaluator.get_searchable_copies(), 2)
3269+
32363270

32373271
class TestVespaNNRecallEvaluator(unittest.TestCase):
32383272
"""Test the VespaNNRecallEvaluator class."""
@@ -3400,6 +3434,10 @@ def query_many(self, queries, max_concurrent=100, **kwargs):
34003434
self.mock_app, [], 100, buckets_per_percent=1
34013435
) # 100 buckets
34023436

3437+
self.two_sc_optimizer = VespaNNParameterOptimizer(
3438+
self.mock_app, [], 100, buckets_per_percent=2
3439+
) # 200 buckets
3440+
34033441
# Percentages: 1, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 99
34043442
self.buckets = [2, 10, 20, 40, 60, 80, 100, 120, 140, 160, 180, 190, 198]
34053443
self.num = len(self.buckets)
@@ -3422,6 +3460,10 @@ def query_many(self, queries, max_concurrent=100, **kwargs):
34223460
]
34233461
self.optimizer.distribute_to_buckets(self.queries_with_hitratios)
34243462
self.optimizerOneBucket.distribute_to_buckets(self.queries_with_hitratios)
3463+
self.two_sc_optimizer.distribute_to_buckets(
3464+
list(map(lambda x: (x[0], x[1] / 2), self.queries_with_hitratios))
3465+
)
3466+
self.two_sc_optimizer.searchable_copies = 2
34253467

34263468
def test_get_bucket_interval_width(self):
34273469
self.assertAlmostEqual(
@@ -3597,17 +3639,16 @@ def test_get_query_distribution(self):
35973639

35983640
def _assert_post_filter_threshold(
35993641
self,
3642+
optimizer,
3643+
buckets,
36003644
response_times_post_filtering,
36013645
recall_post_filtering,
36023646
response_times_pre_filtering,
36033647
recall_pre_filtering,
36043648
lower,
36053649
upper,
36063650
):
3607-
buckets = self.buckets
3608-
filtered_out_ratios = [
3609-
self.optimizer.bucket_to_filtered_out(b) for b in buckets
3610-
]
3651+
filtered_out_ratios = [optimizer.bucket_to_filtered_out(b) for b in buckets]
36113652

36123653
benchmark_post_filtering = BucketedMetricResults(
36133654
metric_name="searchtime",
@@ -3635,7 +3676,7 @@ def _assert_post_filter_threshold(
36353676
filtered_out_ratios=filtered_out_ratios,
36363677
)
36373678

3638-
post_filter_threshold = self.optimizer._suggest_post_filter_threshold(
3679+
post_filter_threshold = optimizer._suggest_post_filter_threshold(
36393680
benchmark_post_filtering,
36403681
recall_post_filtering,
36413682
benchmark_pre_filtering,
@@ -3647,6 +3688,8 @@ def _assert_post_filter_threshold(
36473688
def test_suggest_post_filter_threshold(self):
36483689
# Should be somewhere between 40 and 50 percent
36493690
self._assert_post_filter_threshold(
3691+
self.optimizer,
3692+
self.buckets,
36503693
[5.0, 5.0, 5.0, 10.0, 10.0, 10.0, 15.0, 15.0, 15.0, 20.0, 20.0, 20.0, 25.0],
36513694
[0.80] * self.num,
36523695
[13.0] * self.num,
@@ -3657,6 +3700,8 @@ def test_suggest_post_filter_threshold(self):
36573700

36583701
# Should switch earlier since recall becomes bad
36593702
self._assert_post_filter_threshold(
3703+
self.optimizer,
3704+
self.buckets,
36603705
[5.0, 5.0, 5.0, 10.0, 10.0, 10.0, 15.0, 15.0, 15.0, 20.0, 20.0, 20.0, 25.0],
36613706
[
36623707
0.80,
@@ -3681,6 +3726,8 @@ def test_suggest_post_filter_threshold(self):
36813726

36823727
# Should not switch since recall too bad
36833728
self._assert_post_filter_threshold(
3729+
self.optimizer,
3730+
self.buckets,
36843731
[5.0, 5.0, 5.0, 10.0, 10.0, 10.0, 15.0, 15.0, 15.0, 20.0, 20.0, 20.0, 25.0],
36853732
[0.70] * self.num,
36863733
[13.0] * self.num,
@@ -3691,6 +3738,8 @@ def test_suggest_post_filter_threshold(self):
36913738

36923739
# Should not switch since response time bad
36933740
self._assert_post_filter_threshold(
3741+
self.optimizer,
3742+
self.buckets,
36943743
[25.0] * self.num,
36953744
[0.80] * self.num,
36963745
[13.0] * self.num,
@@ -3699,6 +3748,60 @@ def test_suggest_post_filter_threshold(self):
36993748
0.001,
37003749
)
37013750

3751+
def test_suggest_post_filter_threshold_with_two_searchable_copies(self):
3752+
two_sc_buckets = self.two_sc_optimizer.get_non_empty_buckets()
3753+
two_sc_num = len(two_sc_buckets)
3754+
3755+
# Do not use post filtering
3756+
self._assert_post_filter_threshold(
3757+
self.two_sc_optimizer,
3758+
two_sc_buckets,
3759+
[25.0] * two_sc_num,
3760+
[0.80] * two_sc_num,
3761+
[13.0] * two_sc_num,
3762+
[0.80] * two_sc_num,
3763+
0.0,
3764+
0.001,
3765+
)
3766+
3767+
# Should be somewhere between 40 and 50 percent
3768+
self._assert_post_filter_threshold(
3769+
self.two_sc_optimizer,
3770+
two_sc_buckets,
3771+
[5.0, 5.0, 5.0, 10.0, 10.0, 10.0, 15.0, 15.0, 15.0, 20.0, 20.0, 20.0, 25.0],
3772+
[0.80] * two_sc_num,
3773+
[13.0] * two_sc_num,
3774+
[0.80] * two_sc_num,
3775+
0.40,
3776+
0.50,
3777+
)
3778+
3779+
# Should switch earlier since recall becomes bad
3780+
self._assert_post_filter_threshold(
3781+
self.two_sc_optimizer,
3782+
two_sc_buckets,
3783+
[5.0, 5.0, 5.0, 10.0, 10.0, 10.0, 15.0, 15.0, 15.0, 20.0, 20.0, 20.0, 25.0],
3784+
[
3785+
0.80,
3786+
0.80,
3787+
0.70,
3788+
0.70,
3789+
0.70,
3790+
0.70,
3791+
0.70,
3792+
0.70,
3793+
0.70,
3794+
0.70,
3795+
0.70,
3796+
0.70,
3797+
0.70,
3798+
],
3799+
[13] * two_sc_num,
3800+
[0.80] * two_sc_num,
3801+
0.05,
3802+
0.10,
3803+
)
3804+
37023805
def _assert_approximate_threshold(
37033806
self,
37043807
response_times_exact,

0 commit comments

Comments
 (0)