Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/118774.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 118774
summary: Apply default k for knn query eagerly
area: Vector Search
type: bug
issues: []
2 changes: 1 addition & 1 deletion docs/reference/query-dsl/knn-query.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=knn-query-vector-builde
--
(Optional, integer) The number of nearest neighbors to return from each shard.
{es} collects `k` results from each shard, then merges them to find the global top results.
This value must be less than or equal to `num_candidates`. Defaults to `num_candidates`.
This value must be less than or equal to `num_candidates`. Defaults to search request size.
--

`num_candidates`::
Expand Down
16 changes: 16 additions & 0 deletions rest-api-spec/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -259,4 +259,20 @@ tasks.named("yamlRestTestV7CompatTransform").configure({ task ->
task.skipTest("search.vectors/42_knn_search_bbq_flat/Test knn search", "Scoring has changed in latest versions")
task.skipTest("synonyms/90_synonyms_reloading_for_synset/Reload analyzers for specific synonym set", "Can't work until auto-expand replicas is 0-1 for synonyms index")
task.skipTest("search/90_search_after/_shard_doc sort", "restriction has been lifted in latest versions")
task.skipTest("search.vectors/180_update_dense_vector_type/Test create and update dense vector mapping with bulk indexing", "waiting for #118774 backport")
task.skipTest("search.vectors/160_knn_query_missing_params/kNN query in a bool clause - missing num_candidates", "waiting for #118774 backport")
task.skipTest("search.vectors/110_knn_query_with_filter/Simple knn query", "waiting for #118774 backport")
task.skipTest("search.vectors/160_knn_query_missing_params/kNN search used in nested field - missing num_candidates", "waiting for #118774 backport")
task.skipTest("search.vectors/180_update_dense_vector_type/Test create and update dense vector mapping to int4 with per-doc indexing and flush", "waiting for #118774 backport")
task.skipTest("search.vectors/110_knn_query_with_filter/PRE_FILTER: knn query with internal filter as pre-filter", "waiting for #118774 backport")
task.skipTest("search.vectors/180_update_dense_vector_type/Index, update and merge", "waiting for #118774 backport")
task.skipTest("search.vectors/160_knn_query_missing_params/kNN query with missing num_candidates param - size provided", "waiting for #118774 backport")
task.skipTest("search.vectors/110_knn_query_with_filter/POST_FILTER: knn query with filter from a parent bool query as post-filter", "waiting for #118774 backport")
task.skipTest("search.vectors/120_knn_query_multiple_shards/Aggregations with collected number of docs depends on num_candidates", "waiting for #118774 backport")
task.skipTest("search.vectors/180_update_dense_vector_type/Test create and update dense vector mapping with per-doc indexing and flush", "waiting for #118774 backport")
task.skipTest("search.vectors/110_knn_query_with_filter/PRE_FILTER: knn query with alias filter as pre-filter", "waiting for #118774 backport")
task.skipTest("search.vectors/140_knn_query_with_other_queries/Function score query with knn query", "waiting for #118774 backport")
task.skipTest("search.vectors/130_knn_query_nested_search/nested kNN search inner_hits size > 1", "waiting for #118774 backport")
task.skipTest("search.vectors/110_knn_query_with_filter/PRE_FILTER: pre-filter across multiple aliases", "waiting for #118774 backport")
task.skipTest("search.vectors/160_knn_query_missing_params/kNN search in a dis_max query - missing num_candidates", "waiting for #118774 backport")
})
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,9 @@ setup:

---
"Simple knn query":

- requires:
cluster_features: "search.vectors.k_param_supported"
reason: 'k param for knn as query is required'
- do:
search:
index: my_index
Expand All @@ -71,8 +73,9 @@ setup:
field: my_vector
query_vector: [1, 1, 1, 1]
num_candidates: 5
k: 5

- match: { hits.total.value: 5 } # collector sees num_candidates docs
- match: { hits.total.value: 5 }
- length: {hits.hits: 3}
- match: { hits.hits.0._id: "1" }
- match: { hits.hits.0.fields.my_name.0: v1 }
Expand All @@ -93,8 +96,9 @@ setup:
field: my_vector
query_vector: [1, 1, 1, 1]
num_candidates: 5
k: 5

- match: { hits.total.value: 5 } # collector sees num_candidates docs
- match: { hits.total.value: 5 }
- length: {hits.hits: 3}
- match: { hits.hits.0._id: "2" }
- match: { hits.hits.0.fields.my_name.0: v2 }
Expand Down Expand Up @@ -140,6 +144,7 @@ setup:
field: my_vector
query_vector: [1, 1, 1, 1]
num_candidates: 5
k: 5

- match: { hits.total.value: 5 }
- length: { hits.hits: 3 }
Expand Down Expand Up @@ -184,6 +189,7 @@ setup:
field: my_vector
query_vector: [1, 1, 1, 1]
num_candidates: 100
k: 100

- match: { hits.total.value: 10 } # 5 docs from each alias
- length: {hits.hits: 6}
Expand Down Expand Up @@ -213,6 +219,7 @@ setup:
field: my_vector
query_vector: [1, 1, 1, 1]
num_candidates: 5
k: 5
filter:
term:
my_name: v2
Expand Down Expand Up @@ -243,9 +250,10 @@ setup:
field: my_vector
query_vector: [1, 1, 1, 1]
num_candidates: 5
k: 5

- match: { hits.total.value: 2 }
- length: {hits.hits: 2} # knn query returns top 5 docs, but they are post-filtered to 2 docs
- length: {hits.hits: 2} # knn query returns top 3 docs, but they are post-filtered to 2 docs
- match: { hits.hits.0._id: "2" }
- match: { hits.hits.0.fields.my_name.0: v2 }
- match: { hits.hits.1._id: "4" }
Expand All @@ -271,4 +279,4 @@ setup:
my_name: v1

- match: { hits.total.value: 0}
- length: { hits.hits: 0 } # knn query returns top 5 docs, but they are post-filtered to 0 docs
- length: { hits.hits: 0 } # knn query returns top 3 docs, but they are post-filtered to 0 docs
Original file line number Diff line number Diff line change
Expand Up @@ -166,55 +166,3 @@ setup:
- close_to: { hits.hits.2._score: { value: 120, error: 0.00001 } }
- close_to: { hits.hits.2.matched_queries.bm25_query: { value: 100.0, error: 0.00001 } }
- close_to: { hits.hits.2.matched_queries.knn_query: { value: 20.0, error: 0.00001 } }

---
"Aggregations with collected number of docs depends on num_candidates":
- do:
search:
index: my_index
body:
size: 2
query:
knn:
field: my_vector
query_vector: [1, 1, 1, 1]
num_candidates: 100 # collect up to 100 candidates from each shard
aggs:
my_agg:
terms:
field: my_name
order:
_key: asc

- length: {hits.hits: 2}
- match: {hits.total.value: 12}
- match: {aggregations.my_agg.buckets.0.key: 'v1'}
- match: {aggregations.my_agg.buckets.1.key: 'v2'}
- match: {aggregations.my_agg.buckets.0.doc_count: 6}
- match: {aggregations.my_agg.buckets.1.doc_count: 6}

- do:
search:
index: my_index
body:
size: 2
query:
knn:
field: my_vector
query_vector: [ 1, 1, 1, 1 ]
num_candidates: 3 # collect 3 candidates from each shard
aggs:
my_agg2:
terms:
field: my_name
order:
_key: asc
my_sum_buckets:
sum_bucket:
buckets_path: "my_agg2>_count"

- length: { hits.hits: 2 }
- match: { hits.total.value: 6 }
- match: { aggregations.my_agg2.buckets.0.key: 'v1' }
- match: { aggregations.my_agg2.buckets.1.key: 'v2' }
- match: { aggregations.my_sum_buckets.value: 6.0 }
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,7 @@ setup:
knn:
field: nested.vector
query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ]
k: 5
num_candidates: 5
inner_hits: { size: 2, "fields": [ "nested.paragraph_id" ], _source: false }

Expand All @@ -295,6 +296,7 @@ setup:
knn:
field: nested.vector
query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ]
k: 5
num_candidates: 5
inner_hits: { size: 2, "fields": [ "nested.paragraph_id" ], _source: false }

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ setup:
field: my_vector
query_vector: [ 1, 1, 1, 1 ]
num_candidates: 5
k: 5
functions:
- filter: { match: { my_name: v1 } }
weight: 10
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,9 @@ setup:
knn:
field: vector
query_vector: [1, 1, 1]
k: 2
size: 1
- match: { hits.total: 2 } # due to num_candidates defined as round(1.5 * size), so we only see 2 results
- match: { hits.total: 2 } # k defaults to size
- length: { hits.hits: 1 } # one result is only returned though

---
Expand All @@ -117,6 +118,7 @@ setup:
field: vector
query_vector: [-1, -1, -1]
num_candidates: 1
k: 1
size: 10

- match: { hits.total: 1 }
Expand All @@ -137,9 +139,10 @@ setup:
- knn:
field: vector
query_vector: [ 1, 1, 0]
k: 1
size: 1

- match: { hits.total: 2 } # due to num_candidates defined as round(1.5 * size), so we only see 2 results from cat:A
- match: { hits.total: 1 }
- length: { hits.hits: 1 }

---
Expand All @@ -154,6 +157,7 @@ setup:
- knn:
field: vector
query_vector: [1, 1, 0]
k: 2
- match:
category: B
tie_breaker: 0.8
Expand All @@ -175,6 +179,7 @@ setup:
knn:
field: nested.vector
query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ]
k: 2
inner_hits: { size: 1, "fields": [ "nested.paragraph_id" ], _source: false }
size: 1

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ setup:
field: embedding
query_vector: [1, 1, 1, 1]
num_candidates: 10
k: 10

- match: { hits.total.value: 10 }
- length: {hits.hits: 3}
Expand Down Expand Up @@ -215,6 +216,7 @@ setup:
field: embedding
query_vector: [ 1, 1, 1, 1 ]
num_candidates: 20
k: 20

- match: { hits.total.value: 20 }
- length: { hits.hits: 3 }
Expand Down Expand Up @@ -322,6 +324,7 @@ setup:
field: embedding
query_vector: [ 1, 1, 1, 1 ]
num_candidates: 30
k: 30

- match: { hits.total.value: 30 }
- length: { hits.hits: 4 }
Expand Down Expand Up @@ -430,6 +433,7 @@ setup:
field: embedding
query_vector: [ 1, 1, 1, 1 ]
num_candidates: 40
k: 40

- match: { hits.total.value: 40 }
- length: { hits.hits: 5 }
Expand Down Expand Up @@ -499,6 +503,7 @@ setup:
field: embedding
query_vector: [1, 1, 1, 1]
num_candidates: 10
k: 10

- match: { hits.total.value: 10 }
- length: {hits.hits: 3}
Expand Down Expand Up @@ -559,6 +564,7 @@ setup:
field: embedding
query_vector: [ 1, 1, 1, 1 ]
num_candidates: 20
k: 20

- match: { hits.total.value: 20 }
- length: { hits.hits: 3 }
Expand Down Expand Up @@ -620,6 +626,7 @@ setup:
field: embedding
query_vector: [ 1, 1, 1, 1 ]
num_candidates: 30
k: 30

- match: { hits.total.value: 30 }
- length: { hits.hits: 4 }
Expand Down Expand Up @@ -682,6 +689,7 @@ setup:
field: embedding
query_vector: [ 1, 1, 1, 1 ]
num_candidates: 40
k: 40

- match: { hits.total.value: 40 }
- length: { hits.hits: 5 }
Expand Down Expand Up @@ -751,6 +759,7 @@ setup:
field: embedding
query_vector: [ 1, 1, 1, 1 ]
num_candidates: 10
k: 10

- match: { hits.total.value: 10 }
- length: { hits.hits: 3 }
Expand Down Expand Up @@ -791,6 +800,7 @@ setup:
field: embedding
query_vector: [ 1, 1, 1, 1 ]
num_candidates: 10
k: 10

- match: { hits.total.value: 10 }
- length: { hits.hits: 3 }
Expand Down Expand Up @@ -833,6 +843,7 @@ setup:
field: embedding
query_vector: [ 1, 1, 1, 1 ]
num_candidates: 20
k: 20

- match: { hits.total.value: 20 }
- length: { hits.hits: 3 }
Expand Down Expand Up @@ -869,6 +880,7 @@ setup:
field: embedding
query_vector: [ 1, 1, 1, 1 ]
num_candidates: 20
k: 20

- match: { hits.total.value: 20 }
- length: { hits.hits: 3 }
Expand Down Expand Up @@ -911,6 +923,7 @@ setup:
field: embedding
query_vector: [ 1, 1, 1, 1 ]
num_candidates: 30
k: 30

- match: { hits.total.value: 30 }
- length: { hits.hits: 4 }
Expand All @@ -933,6 +946,7 @@ setup:
knn:
field: embedding
query_vector: [ 1, 1, 1, 1 ]
k: 30
num_candidates: 30

- match: { hits.total.value: 30 }
Expand Down Expand Up @@ -1769,6 +1783,7 @@ setup:
field: embedding
query_vector: [1, 1, 1, 1]
num_candidates: 10
k: 10

- match: { hits.total.value: 10 }
- length: {hits.hits: 3}
Expand Down Expand Up @@ -1875,6 +1890,7 @@ setup:
field: embedding
query_vector: [ 1, 1, 1, 1 ]
num_candidates: 20
k: 20

- match: { hits.total.value: 20 }
- length: { hits.hits: 3 }
Expand Down Expand Up @@ -1982,6 +1998,7 @@ setup:
field: embedding
query_vector: [ 1, 1, 1, 1 ]
num_candidates: 30
k: 30

- match: { hits.total.value: 30 }
- length: { hits.hits: 4 }
Expand Down Expand Up @@ -2090,6 +2107,7 @@ setup:
field: embedding
query_vector: [ 1, 1, 1, 1 ]
num_candidates: 40
k: 40

- match: { hits.total.value: 40 }
- length: { hits.hits: 5 }
Expand Down
Loading
Loading