Skip to content

Commit 229ce2d

Browse files
committed
Add YAML tests
1 parent 916ac83 commit 229ce2d

File tree

6 files changed

+471
-0
lines changed

6 files changed

+471
-0
lines changed

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/41_knn_search_bbq_hnsw.yml

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,86 @@ setup:
8787
# here we verify that are last hit is always the worst one
8888
- match: { hits.hits.2._id: "1" }
8989

90+
---
91+
"Vector rescoring has similar ordering as knn, same scoring as exact search for kNN section":
92+
- skip:
93+
features: "headers"
94+
- do:
95+
headers:
96+
Content-Type: application/json
97+
search:
98+
rest_total_hits_as_int: true
99+
index: bbq_hnsw
100+
body:
101+
knn:
102+
field: vector
103+
query_vector: [ 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0]
104+
k: 3
105+
num_candidates: 3
106+
107+
- match: { hits.total: 3 }
108+
- set: { hits.hits.0._id: knn_id0 }
109+
- set: { hits.hits.1._id: knn_id1 }
110+
- set: { hits.hits.2._id: knn_id2 }
111+
- set: { hits.hits.0._score: knn_score0 }
112+
- set: { hits.hits.1._score: knn_score1 }
113+
- set: { hits.hits.2._score: knn_score2 }
114+
115+
# Rescore
116+
- do:
117+
headers:
118+
Content-Type: application/json
119+
search:
120+
rest_total_hits_as_int: true
121+
index: bbq_hnsw
122+
body:
123+
knn:
124+
field: vector
125+
query_vector: [ 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0]
126+
k: 3
127+
num_candidates: 3
128+
rescore:
129+
oversample: 1.5
130+
131+
# Comparing to knn search, we already have changes in ordering and scoring
132+
- match: { hits.hits.0._id: $knn_id1 }
133+
- match: { hits.hits.1._id: $knn_id0 }
134+
- match: { hits.hits.2._id: $knn_id2 }
135+
136+
# Get rescoring scores
137+
- match: { hits.total: 3 }
138+
- set: { hits.hits.0._id: rescore_id0 }
139+
- set: { hits.hits.1._id: rescore_id1 }
140+
- set: { hits.hits.2._id: rescore_id2 }
141+
- set: { hits.hits.0._score: rescore_score0 }
142+
- set: { hits.hits.1._score: rescore_score1 }
143+
- set: { hits.hits.2._score: rescore_score2 }
144+
145+
# Exact knn via script score
146+
- do:
147+
headers:
148+
Content-Type: application/json
149+
search:
150+
rest_total_hits_as_int: true
151+
body:
152+
query:
153+
script_score:
154+
query: {match_all: {} }
155+
script:
156+
source: "1.0 / (1.0 + Math.pow(l2norm(params.query_vector, 'vector'), 2.0))"
157+
params:
158+
query_vector: [ 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0 ]
159+
160+
# Check same ordering (which will not be true for larger datasets)
161+
# and scoring (which should be for the elements that are present in both)
162+
- match: { hits.total: 3 }
163+
- match: { hits.hits.0._id: $rescore_id0 }
164+
- match: { hits.hits.1._id: $rescore_id1 }
165+
- match: { hits.hits.2._id: $rescore_id2 }
166+
- match: { hits.hits.0._score: $rescore_score0 }
167+
- match: { hits.hits.1._score: $rescore_score1 }
168+
- match: { hits.hits.2._score: $rescore_score2 }
169+
90170
---
91171
"Test bad quantization parameters":
92172
- do:

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/41_knn_search_byte_quantized.yml

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -368,6 +368,85 @@ setup:
368368
- match: {hits.hits.2._id: "1"}
369369
- gte: {hits.hits.2._score: 0.78}
370370
- lte: {hits.hits.2._score: 0.791}
371+
372+
---
373+
# Won't be true for larger datasets, but this helps checking kNN vs rescoring vs exact search
374+
"Vector rescoring has same ordering as knn, same scoring as exact search for kNN section":
375+
- skip:
376+
features: "headers"
377+
378+
# kNN search
379+
- do:
380+
headers:
381+
Content-Type: application/json
382+
search:
383+
rest_total_hits_as_int: true
384+
index: hnsw_byte_quantized
385+
body:
386+
size: 3
387+
query:
388+
knn:
389+
k: 3
390+
num_candidates: 3
391+
field: vector
392+
query_vector: [0.5, 111.3, -13.0, 14.8, -156.0]
393+
394+
- match: { hits.total: 3 }
395+
- set: { hits.hits.0._id: knn_id0 }
396+
- set: { hits.hits.1._id: knn_id1 }
397+
- set: { hits.hits.2._id: knn_id2 }
398+
399+
# Rescore
400+
- do:
401+
headers:
402+
Content-Type: application/json
403+
search:
404+
rest_total_hits_as_int: true
405+
index: hnsw_byte_quantized
406+
body:
407+
size: 3
408+
query:
409+
knn:
410+
k: 3
411+
num_candidates: 3
412+
field: vector
413+
query_vector: [0.5, 111.3, -13.0, 14.8, -156.0]
414+
rescore:
415+
oversample: 1.5
416+
417+
# Check same ordering (which will not be true for larger datasets)
418+
- match: { hits.total: 3 }
419+
- match: { hits.hits.0._id: $knn_id0 }
420+
- match: { hits.hits.1._id: $knn_id1 }
421+
- match: { hits.hits.2._id: $knn_id2 }
422+
- set: { hits.hits.0._score: rescore_score0 }
423+
- set: { hits.hits.1._score: rescore_score1 }
424+
- set: { hits.hits.2._score: rescore_score2 }
425+
426+
- do:
427+
headers:
428+
Content-Type: application/json
429+
search:
430+
rest_total_hits_as_int: true
431+
body:
432+
query:
433+
script_score:
434+
query: {match_all: {} }
435+
script:
436+
source: "1.0 / (1.0 + Math.pow(l2norm(params.query_vector, 'vector'), 2.0))"
437+
params:
438+
query_vector: [0.5, 111.3, -13.0, 14.8, -156.0]
439+
440+
# Check same ordering (which will not be true for larger datasets)
441+
# and scoring (which should be for the elements that are present in both)
442+
- match: { hits.total: 3 }
443+
- match: { hits.hits.0._id: $knn_id0 }
444+
- match: { hits.hits.1._id: $knn_id1 }
445+
- match: { hits.hits.2._id: $knn_id2 }
446+
- match: { hits.hits.0._score: $rescore_score0 }
447+
- match: { hits.hits.1._score: $rescore_score1 }
448+
- match: { hits.hits.2._score: $rescore_score2 }
449+
371450
---
372451
"Test bad quantization parameters":
373452
- do:

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/41_knn_search_half_byte_quantized.yml

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -549,6 +549,84 @@ setup:
549549
- match: { hits.hits.1._id: "2"}
550550
- match: { hits.hits.2._id: "3"}
551551
---
552+
"Vector rescoring has same ordering as knn, same scoring as exact search for kNN section":
553+
- skip:
554+
features: "headers"
555+
- do:
556+
headers:
557+
Content-Type: application/json
558+
search:
559+
rest_total_hits_as_int: true
560+
index: hnsw_byte_quantized
561+
body:
562+
fields: [ "name" ]
563+
knn:
564+
field: vector
565+
query_vector: [-0.5, 90.0, -10, 14.8]
566+
k: 3
567+
num_candidates: 3
568+
569+
- match: { hits.total: 3 }
570+
- set: { hits.hits.0._id: knn_id0 }
571+
- set: { hits.hits.1._id: knn_id1 }
572+
- set: { hits.hits.2._id: knn_id2 }
573+
574+
# Rescore
575+
- do:
576+
headers:
577+
Content-Type: application/json
578+
search:
579+
index: hnsw_byte_quantized
580+
rest_total_hits_as_int: true
581+
body:
582+
fields: [ "name" ]
583+
knn:
584+
field: vector
585+
query_vector: [-0.5, 90.0, -10, 14.8]
586+
k: 3
587+
num_candidates: 3
588+
rescore:
589+
oversample: 1.5
590+
591+
# Comparing to knn search
592+
- match: { hits.hits.0._id: $knn_id0 }
593+
- match: { hits.hits.1._id: $knn_id1 }
594+
- match: { hits.hits.2._id: $knn_id2 }
595+
596+
# Get rescoring scores
597+
- match: { hits.total: 3 }
598+
- set: { hits.hits.0._id: rescore_id0 }
599+
- set: { hits.hits.1._id: rescore_id1 }
600+
- set: { hits.hits.2._id: rescore_id2 }
601+
- set: { hits.hits.0._score: rescore_score0 }
602+
- set: { hits.hits.1._score: rescore_score1 }
603+
- set: { hits.hits.2._score: rescore_score2 }
604+
605+
- do:
606+
headers:
607+
Content-Type: application/json
608+
search:
609+
rest_total_hits_as_int: true
610+
body:
611+
query:
612+
script_score:
613+
query: {match_all: {} }
614+
script:
615+
source: "1.0 / (1.0 + Math.pow(l2norm(params.query_vector, 'vector'), 2.0))"
616+
params:
617+
query_vector: [-0.5, 90.0, -10, 14.8]
618+
619+
# Check same ordering (which will not be true for larger datasets)
620+
# and scoring (which should be for the elements that are present in both)
621+
- match: { hits.total: 3 }
622+
- match: { hits.hits.0._id: $rescore_id0 }
623+
- match: { hits.hits.1._id: $rescore_id1 }
624+
- match: { hits.hits.2._id: $rescore_id2 }
625+
- match: { hits.hits.0._score: $rescore_score0 }
626+
- match: { hits.hits.1._score: $rescore_score1 }
627+
- match: { hits.hits.2._score: $rescore_score2 }
628+
629+
---
552630
"Test odd dimensions fail indexing":
553631
- do:
554632
catch: bad_request

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/42_knn_search_bbq_flat.yml

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,86 @@ setup:
8787
# here we verify that are last hit is always the worst one
8888
- match: { hits.hits.2._id: "1" }
8989
---
90+
"Vector rescoring has similar ordering as knn, same scoring as exact search for kNN section":
91+
- skip:
92+
features: "headers"
93+
- do:
94+
headers:
95+
Content-Type: application/json
96+
search:
97+
rest_total_hits_as_int: true
98+
index: bbq_flat
99+
body:
100+
knn:
101+
field: vector
102+
query_vector: [ 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0]
103+
k: 3
104+
num_candidates: 3
105+
106+
- match: { hits.total: 3 }
107+
- set: { hits.hits.0._id: knn_id0 }
108+
- set: { hits.hits.1._id: knn_id1 }
109+
- set: { hits.hits.2._id: knn_id2 }
110+
- set: { hits.hits.0._score: knn_score0 }
111+
- set: { hits.hits.1._score: knn_score1 }
112+
- set: { hits.hits.2._score: knn_score2 }
113+
114+
# Rescore
115+
- do:
116+
headers:
117+
Content-Type: application/json
118+
search:
119+
rest_total_hits_as_int: true
120+
index: bbq_flat
121+
body:
122+
knn:
123+
field: vector
124+
query_vector: [ 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0]
125+
k: 3
126+
num_candidates: 3
127+
rescore:
128+
oversample: 1.5
129+
130+
# Comparing to knn search, we already have changes in ordering and scoring
131+
- match: { hits.hits.0._id: $knn_id1 }
132+
- match: { hits.hits.1._id: $knn_id0 }
133+
- match: { hits.hits.2._id: $knn_id2 }
134+
135+
# Get rescoring scores
136+
- match: { hits.total: 3 }
137+
- set: { hits.hits.0._id: rescore_id0 }
138+
- set: { hits.hits.1._id: rescore_id1 }
139+
- set: { hits.hits.2._id: rescore_id2 }
140+
- set: { hits.hits.0._score: rescore_score0 }
141+
- set: { hits.hits.1._score: rescore_score1 }
142+
- set: { hits.hits.2._score: rescore_score2 }
143+
144+
# Exact knn via script score
145+
- do:
146+
headers:
147+
Content-Type: application/json
148+
search:
149+
rest_total_hits_as_int: true
150+
body:
151+
query:
152+
script_score:
153+
query: {match_all: {} }
154+
script:
155+
source: "1.0 / (1.0 + Math.pow(l2norm(params.query_vector, 'vector'), 2.0))"
156+
params:
157+
query_vector: [ 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0]
158+
159+
# Check same ordering (which will not be true for larger datasets)
160+
# and scoring (which should be for the elements that are present in both)
161+
- match: { hits.total: 3 }
162+
- match: { hits.hits.0._id: $rescore_id0 }
163+
- match: { hits.hits.1._id: $rescore_id1 }
164+
- match: { hits.hits.2._id: $rescore_id2 }
165+
- match: { hits.hits.0._score: $rescore_score0 }
166+
- match: { hits.hits.1._score: $rescore_score1 }
167+
- match: { hits.hits.2._score: $rescore_score2 }
168+
169+
---
90170
"Test bad parameters":
91171
- do:
92172
catch: bad_request

0 commit comments

Comments
 (0)