Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/121105.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 121105
summary: Mark bbq indices as GA and add rolling upgrade integration tests
area: Vector Search
type: feature
issues: []
10 changes: 5 additions & 5 deletions docs/reference/mapping/types/dense-vector.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ The three following quantization strategies are supported:

* `int8` - Quantizes each dimension of the vector to 1-byte integers. This reduces the memory footprint by 75% (or 4x) at the cost of some accuracy.
* `int4` - Quantizes each dimension of the vector to half-byte integers. This reduces the memory footprint by 87% (or 8x) at the cost of accuracy.
* `bbq` - experimental:[] Better binary quantization which reduces each dimension to a single bit precision. This reduces the memory footprint by 96% (or 32x) at a larger cost of accuracy. Generally, oversampling during query time and reranking can help mitigate the accuracy loss.
* `bbq` - Better binary quantization which reduces each dimension to a single bit precision. This reduces the memory footprint by 96% (or 32x) at a larger cost of accuracy. Generally, oversampling during query time and reranking can help mitigate the accuracy loss.


When using a quantized format, you may want to oversample and rescore the results to improve accuracy. See <<dense-vector-knn-search-rescoring, oversampling and rescoring>> for more information.
Expand All @@ -133,7 +133,7 @@ This means disk usage will increase by ~25% for `int8`, ~12.5% for `int4`, and ~

NOTE: `int4` quantization requires an even number of vector dimensions.

NOTE: experimental:[] `bbq` quantization only supports vector dimensions that are greater than 64.
NOTE: `bbq` quantization only supports vector dimensions that are greater than 64.

Here is an example of how to create a byte-quantized index:

Expand Down Expand Up @@ -177,7 +177,7 @@ PUT my-byte-quantized-index
}
--------------------------------------------------

experimental:[] Here is an example of how to create a binary quantized index:
Here is an example of how to create a binary quantized index:

[source,console]
--------------------------------------------------
Expand Down Expand Up @@ -325,15 +325,15 @@ by 4x at the cost of some accuracy. See <<dense-vector-quantization, Automatical
* `int4_hnsw` - This utilizes the https://arxiv.org/abs/1603.09320[HNSW algorithm] in addition to automatically scalar
quantization for scalable approximate kNN search with `element_type` of `float`. This can reduce the memory footprint
by 8x at the cost of some accuracy. See <<dense-vector-quantization, Automatically quantize vectors for kNN search>>.
* experimental:[] `bbq_hnsw` - This utilizes the https://arxiv.org/abs/1603.09320[HNSW algorithm] in addition to automatically binary
* `bbq_hnsw` - This utilizes the https://arxiv.org/abs/1603.09320[HNSW algorithm] in addition to automatically binary
quantization for scalable approximate kNN search with `element_type` of `float`. This can reduce the memory footprint
by 32x at the cost of accuracy. See <<dense-vector-quantization, Automatically quantize vectors for kNN search>>.
* `flat` - This utilizes a brute-force search algorithm for exact kNN search. This supports all `element_type` values.
* `int8_flat` - This utilizes a brute-force search algorithm in addition to automatically scalar quantization. Only supports
`element_type` of `float`.
* `int4_flat` - This utilizes a brute-force search algorithm in addition to automatically half-byte scalar quantization. Only supports
`element_type` of `float`.
* experimental:[] `bbq_flat` - This utilizes a brute-force search algorithm in addition to automatically binary quantization. Only supports
* `bbq_flat` - This utilizes a brute-force search algorithm in addition to automatically binary quantization. Only supports
`element_type` of `float`.
--
`m`:::
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,14 @@ public VectorSearchIT(@Name("upgradedNodes") int upgradedNodes) {
private static final String SCRIPT_BYTE_INDEX_NAME = "script_byte_vector_index";
private static final String BYTE_INDEX_NAME = "byte_vector_index";
private static final String QUANTIZED_INDEX_NAME = "quantized_vector_index";
private static final String BBQ_INDEX_NAME = "bbq_vector_index";
private static final String FLAT_QUANTIZED_INDEX_NAME = "flat_quantized_vector_index";
private static final String FLAT_BBQ_INDEX_NAME = "flat_bbq_vector_index";
private static final String FLOAT_VECTOR_SEARCH_VERSION = "8.4.0";
private static final String BYTE_VECTOR_SEARCH_VERSION = "8.6.0";
private static final String QUANTIZED_VECTOR_SEARCH_VERSION = "8.12.1";
private static final String FLAT_QUANTIZED_VECTOR_SEARCH_VERSION = "8.13.0";
private static final String BBQ_VECTOR_SEARCH_VERSION = "8.18.0";

public void testScriptByteVectorSearch() throws Exception {
assumeTrue("byte vector search is not supported on this version", getOldClusterTestVersion().onOrAfter(BYTE_VECTOR_SEARCH_VERSION));
Expand Down Expand Up @@ -429,6 +432,182 @@ public void testFlatQuantizedVectorSearch() throws Exception {
assertThat((double) hits.get(0).get("_score"), closeTo(0.9934857, 0.005));
}

public void testBBQVectorSearch() throws Exception {
assumeTrue(
"Quantized vector search is not supported on this version",
getOldClusterTestVersion().onOrAfter(BBQ_VECTOR_SEARCH_VERSION)
);
if (isOldCluster()) {
String mapping = """
{
"properties": {
"vector": {
"type": "dense_vector",
"dims": 64,
"index": true,
"similarity": "cosine",
"index_options": {
"type": "bbq_hnsw",
"ef_construction": 100,
"m": 16
}
}
}
}
""";
// create index and index 10 random floating point vectors
createIndex(BBQ_INDEX_NAME, Settings.EMPTY, mapping);
index64DimVectors(BBQ_INDEX_NAME);
// force merge the index
client().performRequest(new Request("POST", "/" + BBQ_INDEX_NAME + "/_forcemerge?max_num_segments=1"));
}
Request searchRequest = new Request("POST", "/" + BBQ_INDEX_NAME + "/_search");
searchRequest.setJsonEntity("""
{
"query": {
"script_score": {
"query": {
"exists": {
"field": "vector"
}
},
"script": {
"source": "cosineSimilarity(params.query, 'vector') + 1.0",
"params": {
"query": [4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6]
}
}
}
}
}
""");
Map<String, Object> response = search(searchRequest);
assertThat(extractValue(response, "hits.total.value"), equalTo(7));
List<Map<String, Object>> hits = extractValue(response, "hits.hits");
assertThat(hits.get(0).get("_id"), equalTo("0"));
assertThat((double) hits.get(0).get("_score"), closeTo(1.9869276, 0.0001));

// search with knn
searchRequest = new Request("POST", "/" + BBQ_INDEX_NAME + "/_search");
searchRequest.setJsonEntity("""
{
"knn": {
"field": "vector",
"query_vector": [4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6],
"k": 2,
"num_candidates": 5
}
}
""");
response = search(searchRequest);
assertThat(extractValue(response, "hits.total.value"), equalTo(2));
hits = extractValue(response, "hits.hits");
assertThat(hits.get(0).get("_id"), equalTo("0"));
assertThat((double) hits.get(0).get("_score"), closeTo(0.9934857, 0.005));
}

public void testFlatBBQVectorSearch() throws Exception {
assumeTrue(
"Quantized vector search is not supported on this version",
getOldClusterTestVersion().onOrAfter(BBQ_VECTOR_SEARCH_VERSION)
);
if (isOldCluster()) {
String mapping = """
{
"properties": {
"vector": {
"type": "dense_vector",
"dims": 64,
"index": true,
"similarity": "cosine",
"index_options": {
"type": "bbq_flat"
}
}
}
}
""";
// create index and index 10 random floating point vectors
createIndex(FLAT_BBQ_INDEX_NAME, Settings.EMPTY, mapping);
index64DimVectors(FLAT_BBQ_INDEX_NAME);
// force merge the index
client().performRequest(new Request("POST", "/" + FLAT_BBQ_INDEX_NAME + "/_forcemerge?max_num_segments=1"));
}
Request searchRequest = new Request("POST", "/" + FLAT_BBQ_INDEX_NAME + "/_search");
searchRequest.setJsonEntity("""
{
"query": {
"script_score": {
"query": {
"exists": {
"field": "vector"
}
},
"script": {
"source": "cosineSimilarity(params.query, 'vector') + 1.0",
"params": {
"query": [4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6]
}
}
}
}
}
""");
Map<String, Object> response = search(searchRequest);
assertThat(extractValue(response, "hits.total.value"), equalTo(7));
List<Map<String, Object>> hits = extractValue(response, "hits.hits");
assertThat(hits.get(0).get("_id"), equalTo("0"));
assertThat((double) hits.get(0).get("_score"), closeTo(1.9869276, 0.0001));

// search with knn
searchRequest = new Request("POST", "/" + FLAT_BBQ_INDEX_NAME + "/_search");
searchRequest.setJsonEntity("""
{
"knn": {
"field": "vector",
"query_vector": [4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6],
"k": 2,
"num_candidates": 5
}
}
""");
response = search(searchRequest);
assertThat(extractValue(response, "hits.total.value"), equalTo(2));
hits = extractValue(response, "hits.hits");
assertThat(hits.get(0).get("_id"), equalTo("0"));
assertThat((double) hits.get(0).get("_score"), closeTo(0.9934857, 0.005));
}

private void index64DimVectors(String indexName) throws Exception {
String[] vectors = new String[] {
"{\"vector\":[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, "
+ "1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}",
"{\"vector\":[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, "
+ "1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]}",
"{\"vector\":[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, "
+ "1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3]}",
"{\"vector\":[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, "
+ "2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}",
"{\"vector\":[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, "
+ "3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}",
"{\"vector\":[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, "
+ "1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}",
"{\"vector\":[3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, "
+ "1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}",
"{}" };
for (int i = 0; i < vectors.length; i++) {
Request indexRequest = new Request("PUT", "/" + indexName + "/_doc/" + i);
indexRequest.setJsonEntity(vectors[i]);
assertOK(client().performRequest(indexRequest));
}
// always refresh to ensure the data is visible
refresh(indexName);
}

private void indexVectors(String indexName) throws Exception {
String[] vectors = new String[] {
"{\"vector\":[1, 1, 1]}",
Expand Down