From ba997cb704a4a3fdcf8d9f0ac8610d869d16a729 Mon Sep 17 00:00:00 2001 From: Benjamin Trent Date: Wed, 29 Jan 2025 09:58:08 -0500 Subject: [PATCH] Mark bbq indices as GA and add rolling upgrade integration tests (#121105) With the introduction of our new backing algorithm and making rescoring easier with the `rescore_vector` API, let's mark bbq as GA. Additionally, this commit adds rolling upgrade tests to ensure stability. --- docs/changelog/121105.yaml | 5 + .../mapping/types/dense-vector.asciidoc | 10 +- .../upgrades/VectorSearchIT.java | 179 ++++++++++++++++++ 3 files changed, 189 insertions(+), 5 deletions(-) create mode 100644 docs/changelog/121105.yaml diff --git a/docs/changelog/121105.yaml b/docs/changelog/121105.yaml new file mode 100644 index 0000000000000..925d3a036e5c2 --- /dev/null +++ b/docs/changelog/121105.yaml @@ -0,0 +1,5 @@ +pr: 121105 +summary: Mark bbq indices as GA and add rolling upgrade integration tests +area: Vector Search +type: feature +issues: [] diff --git a/docs/reference/mapping/types/dense-vector.asciidoc b/docs/reference/mapping/types/dense-vector.asciidoc index 5492c953dcac6..7afa4af1f5b9e 100644 --- a/docs/reference/mapping/types/dense-vector.asciidoc +++ b/docs/reference/mapping/types/dense-vector.asciidoc @@ -118,7 +118,7 @@ The three following quantization strategies are supported: * `int8` - Quantizes each dimension of the vector to 1-byte integers. This reduces the memory footprint by 75% (or 4x) at the cost of some accuracy. * `int4` - Quantizes each dimension of the vector to half-byte integers. This reduces the memory footprint by 87% (or 8x) at the cost of accuracy. -* `bbq` - experimental:[] Better binary quantization which reduces each dimension to a single bit precision. This reduces the memory footprint by 96% (or 32x) at a larger cost of accuracy. Generally, oversampling during query time and reranking can help mitigate the accuracy loss. +* `bbq` - Better binary quantization which reduces each dimension to a single bit precision. This reduces the memory footprint by 96% (or 32x) at a larger cost of accuracy. Generally, oversampling during query time and reranking can help mitigate the accuracy loss. When using a quantized format, you may want to oversample and rescore the results to improve accuracy. See <> for more information. @@ -133,7 +133,7 @@ This means disk usage will increase by ~25% for `int8`, ~12.5% for `int4`, and ~ NOTE: `int4` quantization requires an even number of vector dimensions. -NOTE: experimental:[] `bbq` quantization only supports vector dimensions that are greater than 64. +NOTE: `bbq` quantization only supports vector dimensions that are greater than 64. Here is an example of how to create a byte-quantized index: @@ -177,7 +177,7 @@ PUT my-byte-quantized-index } -------------------------------------------------- -experimental:[] Here is an example of how to create a binary quantized index: +Here is an example of how to create a binary quantized index: [source,console] -------------------------------------------------- @@ -325,7 +325,7 @@ by 4x at the cost of some accuracy. See <>. -* experimental:[] `bbq_hnsw` - This utilizes the https://arxiv.org/abs/1603.09320[HNSW algorithm] in addition to automatically binary +* `bbq_hnsw` - This utilizes the https://arxiv.org/abs/1603.09320[HNSW algorithm] in addition to automatically binary quantization for scalable approximate kNN search with `element_type` of `float`. This can reduce the memory footprint by 32x at the cost of accuracy. See <>. * `flat` - This utilizes a brute-force search algorithm for exact kNN search. This supports all `element_type` values. @@ -333,7 +333,7 @@ by 32x at the cost of accuracy. See < response = search(searchRequest); + assertThat(extractValue(response, "hits.total.value"), equalTo(7)); + List> hits = extractValue(response, "hits.hits"); + assertThat(hits.get(0).get("_id"), equalTo("0")); + assertThat((double) hits.get(0).get("_score"), closeTo(1.9869276, 0.0001)); + + // search with knn + searchRequest = new Request("POST", "/" + BBQ_INDEX_NAME + "/_search"); + searchRequest.setJsonEntity(""" + { + "knn": { + "field": "vector", + "query_vector": [4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6], + "k": 2, + "num_candidates": 5 + } + } + """); + response = search(searchRequest); + assertThat(extractValue(response, "hits.total.value"), equalTo(2)); + hits = extractValue(response, "hits.hits"); + assertThat(hits.get(0).get("_id"), equalTo("0")); + assertThat((double) hits.get(0).get("_score"), closeTo(0.9934857, 0.005)); + } + + public void testFlatBBQVectorSearch() throws Exception { + assumeTrue( + "Quantized vector search is not supported on this version", + getOldClusterTestVersion().onOrAfter(BBQ_VECTOR_SEARCH_VERSION) + ); + if (isOldCluster()) { + String mapping = """ + { + "properties": { + "vector": { + "type": "dense_vector", + "dims": 64, + "index": true, + "similarity": "cosine", + "index_options": { + "type": "bbq_flat" + } + } + } + } + """; + // create index and index 10 random floating point vectors + createIndex(FLAT_BBQ_INDEX_NAME, Settings.EMPTY, mapping); + index64DimVectors(FLAT_BBQ_INDEX_NAME); + // force merge the index + client().performRequest(new Request("POST", "/" + FLAT_BBQ_INDEX_NAME + "/_forcemerge?max_num_segments=1")); + } + Request searchRequest = new Request("POST", "/" + FLAT_BBQ_INDEX_NAME + "/_search"); + searchRequest.setJsonEntity(""" + { + "query": { + "script_score": { + "query": { + "exists": { + "field": "vector" + } + }, + "script": { + "source": "cosineSimilarity(params.query, 'vector') + 1.0", + "params": { + "query": [4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6] + } + } + } + } + } + """); + Map response = search(searchRequest); + assertThat(extractValue(response, "hits.total.value"), equalTo(7)); + List> hits = extractValue(response, "hits.hits"); + assertThat(hits.get(0).get("_id"), equalTo("0")); + assertThat((double) hits.get(0).get("_score"), closeTo(1.9869276, 0.0001)); + + // search with knn + searchRequest = new Request("POST", "/" + FLAT_BBQ_INDEX_NAME + "/_search"); + searchRequest.setJsonEntity(""" + { + "knn": { + "field": "vector", + "query_vector": [4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6], + "k": 2, + "num_candidates": 5 + } + } + """); + response = search(searchRequest); + assertThat(extractValue(response, "hits.total.value"), equalTo(2)); + hits = extractValue(response, "hits.hits"); + assertThat(hits.get(0).get("_id"), equalTo("0")); + assertThat((double) hits.get(0).get("_score"), closeTo(0.9934857, 0.005)); + } + + private void index64DimVectors(String indexName) throws Exception { + String[] vectors = new String[] { + "{\"vector\":[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, " + + "1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}", + "{\"vector\":[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, " + + "1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]}", + "{\"vector\":[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, " + + "1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3]}", + "{\"vector\":[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, " + + "2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}", + "{\"vector\":[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, " + + "3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}", + "{\"vector\":[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, " + + "1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}", + "{\"vector\":[3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, " + + "1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}", + "{}" }; + for (int i = 0; i < vectors.length; i++) { + Request indexRequest = new Request("PUT", "/" + indexName + "/_doc/" + i); + indexRequest.setJsonEntity(vectors[i]); + assertOK(client().performRequest(indexRequest)); + } + // always refresh to ensure the data is visible + refresh(indexName); + } + private void indexVectors(String indexName) throws Exception { String[] vectors = new String[] { "{\"vector\":[1, 1, 1]}",