diff --git a/docs/changelog/127134.yaml b/docs/changelog/127134.yaml new file mode 100644 index 0000000000000..97d6de2f99878 --- /dev/null +++ b/docs/changelog/127134.yaml @@ -0,0 +1,5 @@ +pr: 127134 +summary: Define a default oversample value for dense vectors with bbq_hnsw/bbq_flat +area: Vector Search +type: enhancement +issues: [] diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/41_knn_search_bbq_hnsw.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/41_knn_search_bbq_hnsw.yml index c9c9ef8daf2c9..cdf9cbda1b34f 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/41_knn_search_bbq_hnsw.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/41_knn_search_bbq_hnsw.yml @@ -572,28 +572,13 @@ setup: - match: { hits.hits.2._score: $override_score2 } - match: { hits.hits.2._score: $default_rescore2 } +--- +"default oversample value": + - requires: + cluster_features: ["mapper.dense_vector.default_oversample_value_for_bbq"] + reason: "Needs default_oversample_value_for_bbq feature" - do: - headers: - Content-Type: application/json - search: - rest_total_hits_as_int: true - index: bbq_rescore_zero_hnsw - body: - knn: - field: vector - query_vector: [0.128, 0.067, -0.08 , 0.395, -0.11 , -0.259, 0.473, -0.393, - 0.292, 0.571, -0.491, 0.444, -0.288, 0.198, -0.343, 0.015, - 0.232, 0.088, 0.228, 0.151, -0.136, 0.236, -0.273, -0.259, - -0.217, 0.359, -0.207, 0.352, -0.142, 0.192, -0.061, -0.17 , - -0.343, 0.189, -0.221, 0.32 , -0.301, -0.1 , 0.005, 0.232, - -0.344, 0.136, 0.252, 0.157, -0.13 , -0.244, 0.193, -0.034, - -0.12 , -0.193, -0.102, 0.252, -0.185, -0.167, -0.575, 0.582, - -0.426, 0.983, 0.212, 0.204, 0.03 , -0.276, -0.425, -0.158] - k: 3 - num_candidates: 3 + indices.get_mapping: + index: bbq_hnsw - # Compare scores as hit IDs may change depending on how things are distributed - - match: { hits.total: 3 } - - match: { hits.hits.0._score: $raw_score0 } - - match: { hits.hits.1._score: $raw_score1 } - - match: { hits.hits.2._score: $raw_score2 } + - match: { bbq_hnsw.mappings.properties.vector.index_options.rescore_vector.oversample: 3.0 } diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/42_knn_search_bbq_flat.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/42_knn_search_bbq_flat.yml index 8374b636f1dd6..54b1e9705b019 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/42_knn_search_bbq_flat.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/42_knn_search_bbq_flat.yml @@ -342,3 +342,14 @@ setup: - match: { hits.hits.0._score: $rescore_score0 } - match: { hits.hits.1._score: $rescore_score1 } - match: { hits.hits.2._score: $rescore_score2 } + +--- +"default oversample value": + - requires: + cluster_features: ["mapper.dense_vector.default_oversample_value_for_bbq"] + reason: "Needs default_oversample_value_for_bbq feature" + - do: + indices.get_mapping: + index: bbq_flat + + - match: { bbq_flat.mappings.properties.vector.index_options.rescore_vector.oversample: 3.0 } diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/42_knn_search_int4_flat.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/42_knn_search_int4_flat.yml index 6dad9ddd26214..26749e503bff0 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/42_knn_search_int4_flat.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/42_knn_search_int4_flat.yml @@ -498,3 +498,14 @@ setup: - match: { hits.hits.0._score: $rescore_score0 } - match: { hits.hits.1._score: $rescore_score1 } - match: { hits.hits.2._score: $rescore_score2 } + +--- +"no default oversample value": + - requires: + cluster_features: ["mapper.dense_vector.default_oversample_value_for_bbq"] + reason: "Needs default_oversample_value_for_bbq feature" + - do: + indices.get_mapping: + index: int4_flat + + - not_exists: int4_flat.mappings.properties.vector.index_options.rescore_vector diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/42_knn_search_int8_flat.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/42_knn_search_int8_flat.yml index 1087b5b264cf8..0fc3f3e4f91ce 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/42_knn_search_int8_flat.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/42_knn_search_int8_flat.yml @@ -436,3 +436,14 @@ setup: - match: { hits.hits.0._score: $rescore_score0 } - match: { hits.hits.1._score: $rescore_score1 } - match: { hits.hits.2._score: $rescore_score2 } + +--- +"no default oversample value": + - requires: + cluster_features: ["mapper.dense_vector.default_oversample_value_for_bbq"] + reason: "Needs default_oversample_value_for_bbq feature" + - do: + indices.get_mapping: + index: int8_flat + + - not_exists: int8_flat.mappings.properties.vector.index_options.rescore_vector diff --git a/server/src/main/java/org/elasticsearch/index/IndexVersions.java b/server/src/main/java/org/elasticsearch/index/IndexVersions.java index 8f17c16a16e6e..05e8cd545afac 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexVersions.java +++ b/server/src/main/java/org/elasticsearch/index/IndexVersions.java @@ -130,6 +130,7 @@ private static IndexVersion def(int id, Version luceneVersion) { public static final IndexVersion SYNTHETIC_SOURCE_STORE_ARRAYS_NATIVELY = def(8_527_0_00, Version.LUCENE_9_12_1); public static final IndexVersion ADD_RESCORE_PARAMS_TO_QUANTIZED_VECTORS = def(8_528_0_00, Version.LUCENE_9_12_1); public static final IndexVersion RESCORE_PARAMS_ALLOW_ZERO_TO_QUANTIZED_VECTORS = def(8_529_0_00, Version.LUCENE_9_12_1); + public static final IndexVersion DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ_BACKPORT_8_X = def(8_530_0_00, Version.LUCENE_9_12_1); /* * STOP! READ THIS FIRST! No, really, * ____ _____ ___ ____ _ ____ _____ _ ____ _____ _ _ ___ ____ _____ ___ ____ ____ _____ _ diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java index bb987dc284bfe..b8c7e783d90cf 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java @@ -19,6 +19,7 @@ import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.RESCORE_VECTOR_QUANTIZED_VECTOR_MAPPING; import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.RESCORE_ZERO_VECTOR_QUANTIZED_VECTOR_MAPPING; +import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.USE_DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ; /** * Spec for mapper-related features. @@ -95,7 +96,8 @@ public Set getTestFeatures() { DateFieldMapper.INVALID_DATE_FIX, NPE_ON_DIMS_UPDATE_FIX, RESCORE_VECTOR_QUANTIZED_VECTOR_MAPPING, - RESCORE_ZERO_VECTOR_QUANTIZED_VECTOR_MAPPING + RESCORE_ZERO_VECTOR_QUANTIZED_VECTOR_MAPPING, + USE_DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ ); } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java index 026a8d195be7e..9bcb1b8935623 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java @@ -94,8 +94,6 @@ import static org.elasticsearch.common.Strings.format; import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken; -import static org.elasticsearch.index.IndexVersions.DEFAULT_DENSE_VECTOR_TO_INT8_HNSW; -import static org.elasticsearch.index.IndexVersions.RESCORE_PARAMS_ALLOW_ZERO_TO_QUANTIZED_VECTORS; /** * A {@link FieldMapper} for indexing a dense vector of floats. @@ -118,21 +116,26 @@ private static boolean hasRescoreIndexVersion(IndexVersion version) { } private static boolean allowsZeroRescore(IndexVersion version) { - return version.onOrAfter(RESCORE_PARAMS_ALLOW_ZERO_TO_QUANTIZED_VECTORS); + return version.onOrAfter(IndexVersions.RESCORE_PARAMS_ALLOW_ZERO_TO_QUANTIZED_VECTORS); + } + + private static boolean defaultOversampleForBBQ(IndexVersion version) { + return version.onOrAfter(IndexVersions.DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ_BACKPORT_8_X); } public static final IndexVersion MAGNITUDE_STORED_INDEX_VERSION = IndexVersions.V_7_5_0; public static final IndexVersion INDEXED_BY_DEFAULT_INDEX_VERSION = IndexVersions.FIRST_DETACHED_INDEX_VERSION; public static final IndexVersion NORMALIZE_COSINE = IndexVersions.NORMALIZED_VECTOR_COSINE; - public static final IndexVersion DEFAULT_TO_INT8 = DEFAULT_DENSE_VECTOR_TO_INT8_HNSW; + public static final IndexVersion DEFAULT_TO_INT8 = IndexVersions.DEFAULT_DENSE_VECTOR_TO_INT8_HNSW; public static final IndexVersion LITTLE_ENDIAN_FLOAT_STORED_INDEX_VERSION = IndexVersions.V_8_9_0; - public static final IndexVersion RESCORE_PARAMS_ALLOW_ZERO_TO_QUANTIZED_VECTORS = - IndexVersions.RESCORE_PARAMS_ALLOW_ZERO_TO_QUANTIZED_VECTORS; public static final NodeFeature RESCORE_VECTOR_QUANTIZED_VECTOR_MAPPING = new NodeFeature("mapper.dense_vector.rescore_vector"); public static final NodeFeature RESCORE_ZERO_VECTOR_QUANTIZED_VECTOR_MAPPING = new NodeFeature( "mapper.dense_vector.rescore_zero_vector" ); + public static final NodeFeature USE_DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ = new NodeFeature( + "mapper.dense_vector.default_oversample_value_for_bbq" + ); public static final String CONTENT_TYPE = "dense_vector"; public static short MAX_DIMS_COUNT = 4096; // maximum allowed number of dimensions @@ -141,6 +144,7 @@ private static boolean allowsZeroRescore(IndexVersion version) { public static short MIN_DIMS_FOR_DYNAMIC_FLOAT_MAPPING = 128; // minimum number of dims for floats to be dynamically mapped to vector public static final int MAGNITUDE_BYTES = 4; public static final int OVERSAMPLE_LIMIT = 10_000; // Max oversample allowed + public static final float DEFAULT_OVERSAMPLE = 3.0F; // Default oversample value private static DenseVectorFieldMapper toType(FieldMapper in) { return (DenseVectorFieldMapper) in; @@ -196,7 +200,7 @@ public Builder(String name, IndexVersion indexVersionCreated) { super(name); this.indexVersionCreated = indexVersionCreated; final boolean indexedByDefault = indexVersionCreated.onOrAfter(INDEXED_BY_DEFAULT_INDEX_VERSION); - final boolean defaultInt8Hnsw = indexVersionCreated.onOrAfter(DEFAULT_DENSE_VECTOR_TO_INT8_HNSW); + final boolean defaultInt8Hnsw = indexVersionCreated.onOrAfter(IndexVersions.DEFAULT_DENSE_VECTOR_TO_INT8_HNSW); this.indexed = Parameter.indexParam(m -> toType(m).fieldType().indexed, indexedByDefault); if (indexedByDefault) { // Only serialize on newer index versions to prevent breaking existing indices when upgrading @@ -1439,6 +1443,9 @@ public IndexOptions parseIndexOptions(String fieldName, Map indexOpti RescoreVector rescoreVector = null; if (hasRescoreIndexVersion(indexVersion)) { rescoreVector = RescoreVector.fromIndexOptions(indexOptionsMap, indexVersion); + if (rescoreVector == null && defaultOversampleForBBQ(indexVersion)) { + rescoreVector = new RescoreVector(DEFAULT_OVERSAMPLE); + } } MappingParser.checkNoRemainingFields(fieldName, indexOptionsMap); return new BBQHnswIndexOptions(m, efConstruction, rescoreVector); @@ -1460,6 +1467,9 @@ public IndexOptions parseIndexOptions(String fieldName, Map indexOpti RescoreVector rescoreVector = null; if (hasRescoreIndexVersion(indexVersion)) { rescoreVector = RescoreVector.fromIndexOptions(indexOptionsMap, indexVersion); + if (rescoreVector == null && defaultOversampleForBBQ(indexVersion)) { + rescoreVector = new RescoreVector(DEFAULT_OVERSAMPLE); + } } MappingParser.checkNoRemainingFields(fieldName, indexOptionsMap); return new BBQFlatIndexOptions(rescoreVector); @@ -2288,6 +2298,10 @@ int getVectorDimensions() { ElementType getElementType() { return elementType; } + + IndexOptions getIndexOptions() { + return indexOptions; + } } private final IndexOptions indexOptions; diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java index 496f8b908fcbf..af8bd3d479ed5 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java @@ -1022,6 +1022,60 @@ public void testInvalidRescoreVector() { } } + public void testDefaultOversampleValue() throws IOException { + { + DocumentMapper mapperService = createDocumentMapper(fieldMapping(b -> { + b.field("type", "dense_vector"); + b.field("dims", 128); + b.field("index", true); + b.field("similarity", "dot_product"); + b.startObject("index_options"); + b.field("type", "bbq_hnsw"); + b.endObject(); + })); + + DenseVectorFieldMapper denseVectorFieldMapper = (DenseVectorFieldMapper) mapperService.mappers().getMapper("field"); + DenseVectorFieldMapper.BBQHnswIndexOptions indexOptions = (DenseVectorFieldMapper.BBQHnswIndexOptions) denseVectorFieldMapper + .fieldType() + .getIndexOptions(); + assertEquals(3.0F, indexOptions.rescoreVector.oversample(), 0.0F); + } + { + DocumentMapper mapperService = createDocumentMapper(fieldMapping(b -> { + b.field("type", "dense_vector"); + b.field("dims", 128); + b.field("index", true); + b.field("similarity", "dot_product"); + b.startObject("index_options"); + b.field("type", "bbq_flat"); + b.endObject(); + })); + + DenseVectorFieldMapper denseVectorFieldMapper = (DenseVectorFieldMapper) mapperService.mappers().getMapper("field"); + DenseVectorFieldMapper.BBQFlatIndexOptions indexOptions = (DenseVectorFieldMapper.BBQFlatIndexOptions) denseVectorFieldMapper + .fieldType() + .getIndexOptions(); + assertEquals(3.0F, indexOptions.rescoreVector.oversample(), 0.0F); + } + { + DocumentMapper mapperService = createDocumentMapper(fieldMapping(b -> { + b.field("type", "dense_vector"); + b.field("dims", 128); + b.field("index", true); + b.field("similarity", "dot_product"); + b.startObject("index_options"); + b.field("type", "int8_hnsw"); + b.endObject(); + })); + + DenseVectorFieldMapper denseVectorFieldMapper = (DenseVectorFieldMapper) mapperService.mappers().getMapper("field"); + DenseVectorFieldMapper.Int8HnswIndexOptions indexOptions = (DenseVectorFieldMapper.Int8HnswIndexOptions) denseVectorFieldMapper + .fieldType() + .getIndexOptions(); + assertNull(indexOptions.rescoreVector); + } + } + public void testDims() { { Exception e = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> { diff --git a/server/src/test/java/org/elasticsearch/search/vectors/AbstractKnnVectorQueryBuilderTestCase.java b/server/src/test/java/org/elasticsearch/search/vectors/AbstractKnnVectorQueryBuilderTestCase.java index 1a964c8c2b4f7..9499edc71b4a6 100644 --- a/server/src/test/java/org/elasticsearch/search/vectors/AbstractKnnVectorQueryBuilderTestCase.java +++ b/server/src/test/java/org/elasticsearch/search/vectors/AbstractKnnVectorQueryBuilderTestCase.java @@ -46,6 +46,7 @@ import java.util.stream.Collectors; import java.util.stream.Stream; +import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.DEFAULT_OVERSAMPLE; import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.OVERSAMPLE_LIMIT; import static org.elasticsearch.search.SearchService.DEFAULT_SIZE; import static org.hamcrest.Matchers.containsString; @@ -144,7 +145,7 @@ protected KnnVectorQueryBuilder doCreateTestQueryBuilder() { fieldName, k, numCands, - randomRescoreVectorBuilder(), + isIndextypeBBQ() ? randomBBQRescoreVectorBuilder() : randomRescoreVectorBuilder(), randomFloat() ); @@ -161,6 +162,14 @@ protected KnnVectorQueryBuilder doCreateTestQueryBuilder() { return queryBuilder; } + private boolean isIndextypeBBQ() { + return indexType.equals("bbq_hnsw") || indexType.equals("bbq_flat"); + } + + protected RescoreVectorBuilder randomBBQRescoreVectorBuilder() { + return new RescoreVectorBuilder(randomBoolean() ? DEFAULT_OVERSAMPLE : randomFloatBetween(1.0f, 10.0f, false)); + } + protected RescoreVectorBuilder randomRescoreVectorBuilder() { if (randomBoolean()) { return null;