diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java index ce41c2164e205..39293514b81fe 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java @@ -279,6 +279,11 @@ public Builder elementType(ElementType elementType) { return this; } + public Builder indexOptions(IndexOptions indexOptions) { + this.indexOptions.setValue(indexOptions); + return this; + } + @Override public DenseVectorFieldMapper build(MapperBuilderContext context) { // Validate again here because the dimensions or element type could have been set programmatically, @@ -1177,7 +1182,7 @@ public final String toString() { public abstract VectorSimilarityFunction vectorSimilarityFunction(IndexVersion indexVersion, ElementType elementType); } - abstract static class IndexOptions implements ToXContent { + public abstract static class IndexOptions implements ToXContent { final VectorIndexType type; IndexOptions(VectorIndexType type) { @@ -1186,7 +1191,7 @@ abstract static class IndexOptions implements ToXContent { abstract KnnVectorsFormat getVectorsFormat(ElementType elementType); - final void validateElementType(ElementType elementType) { + public final void validateElementType(ElementType elementType) { if (type.supportsElementType(elementType) == false) { throw new IllegalArgumentException( "[element_type] cannot be [" + elementType.toString() + "] when using index type [" + type + "]" @@ -2319,7 +2324,11 @@ public FieldMapper.Builder getMergeBuilder() { return new Builder(leafName(), indexCreatedVersion).init(this); } - private static IndexOptions parseIndexOptions(String fieldName, Object propNode) { + public IndexOptions indexOptions() { + return indexOptions; + } + + public static IndexOptions parseIndexOptions(String fieldName, Object propNode) { @SuppressWarnings("unchecked") Map indexOptionsMap = (Map) propNode; Object typeNode = indexOptionsMap.remove("type"); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java index 3bebd8086d792..2d6ae0af4dfeb 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java @@ -89,6 +89,7 @@ import java.util.function.BiConsumer; import java.util.function.Function; +import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.parseIndexOptions; import static org.elasticsearch.inference.TaskType.SPARSE_EMBEDDING; import static org.elasticsearch.inference.TaskType.TEXT_EMBEDDING; import static org.elasticsearch.search.SearchService.DEFAULT_SIZE; @@ -136,6 +137,10 @@ public static BiConsumer validateParserContext(Str }; } + private static Builder builder(FieldMapper in) { + return ((SemanticTextFieldMapper) in).builder; + } + public static class Builder extends FieldMapper.Builder { private final boolean useLegacyFormat; @@ -175,6 +180,20 @@ public static class Builder extends FieldMapper.Builder { Objects::toString ).acceptsNull().setMergeValidator(SemanticTextFieldMapper::canMergeModelSettings); + private final Parameter indexOptions = new Parameter<>( + "index_options", + true, + () -> null, + (n, c, o) -> o == null ? null : parseIndexOptions(n, o), + m -> builder(m).indexOptions.get(), + (b, n, v) -> { + if (v != null) { + b.field(n, v); + } + }, + Objects::toString + ); + private final Parameter> meta = Parameter.metaParam(); private Function inferenceFieldBuilder; @@ -197,6 +216,7 @@ public Builder(String name, Function bitSetProducer, Inde indexSettings.getIndexVersionCreated(), useLegacyFormat, modelSettings.get(), + indexOptions.get(), bitSetProducer, indexSettings ); @@ -265,7 +285,8 @@ public SemanticTextFieldMapper build(MapperBuilderContext context) { useLegacyFormat, meta.getValue() ), - builderParams(this, context) + builderParams(this, context), + this ); } @@ -306,9 +327,12 @@ private SemanticTextFieldMapper copySettings(SemanticTextFieldMapper mapper, Map } } - private SemanticTextFieldMapper(String simpleName, MappedFieldType mappedFieldType, BuilderParams builderParams) { + private final Builder builder; + + private SemanticTextFieldMapper(String simpleName, MappedFieldType mappedFieldType, BuilderParams builderParams, Builder builder) { super(simpleName, mappedFieldType, builderParams); ensureMultiFields(builderParams.multiFields().iterator()); + this.builder = builder; } private void ensureMultiFields(Iterator mappers) { @@ -910,11 +934,12 @@ private static ObjectMapper createInferenceField( IndexVersion indexVersionCreated, boolean useLegacyFormat, @Nullable MinimalServiceSettings modelSettings, + @Nullable DenseVectorFieldMapper.IndexOptions indexOptions, Function bitSetProducer, IndexSettings indexSettings ) { return new ObjectMapper.Builder(INFERENCE_FIELD, Optional.of(ObjectMapper.Subobjects.ENABLED)).dynamic(ObjectMapper.Dynamic.FALSE) - .add(createChunksField(indexVersionCreated, useLegacyFormat, modelSettings, bitSetProducer, indexSettings)) + .add(createChunksField(indexVersionCreated, useLegacyFormat, modelSettings, indexOptions, bitSetProducer, indexSettings)) .build(context); } @@ -922,6 +947,7 @@ private static NestedObjectMapper.Builder createChunksField( IndexVersion indexVersionCreated, boolean useLegacyFormat, @Nullable MinimalServiceSettings modelSettings, + @Nullable DenseVectorFieldMapper.IndexOptions indexOptions, Function bitSetProducer, IndexSettings indexSettings ) { @@ -933,7 +959,7 @@ private static NestedObjectMapper.Builder createChunksField( ); chunksField.dynamic(ObjectMapper.Dynamic.FALSE); if (modelSettings != null) { - chunksField.add(createEmbeddingsField(indexSettings.getIndexVersionCreated(), modelSettings, useLegacyFormat)); + chunksField.add(createEmbeddingsField(indexSettings.getIndexVersionCreated(), modelSettings, indexOptions, useLegacyFormat)); } if (useLegacyFormat) { var chunkTextField = new KeywordFieldMapper.Builder(TEXT_FIELD, indexVersionCreated).indexed(false).docValues(false); @@ -947,6 +973,7 @@ private static NestedObjectMapper.Builder createChunksField( private static Mapper.Builder createEmbeddingsField( IndexVersion indexVersionCreated, MinimalServiceSettings modelSettings, + DenseVectorFieldMapper.IndexOptions indexOptions, boolean useLegacyFormat ) { return switch (modelSettings.taskType()) { @@ -970,6 +997,11 @@ private static Mapper.Builder createEmbeddingsField( } denseVectorMapperBuilder.dimensions(modelSettings.dimensions()); denseVectorMapperBuilder.elementType(modelSettings.elementType()); + if (indexOptions != null) { + indexOptions.validateDimension(modelSettings.dimensions()); + indexOptions.validateElementType(modelSettings.elementType()); + denseVectorMapperBuilder.indexOptions(indexOptions); + } yield denseVectorMapperBuilder; } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java index 5d1c058c89da0..96aa835c5707c 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java @@ -23,7 +23,6 @@ import org.apache.lucene.search.join.BitSetProducer; import org.apache.lucene.search.join.QueryBitSetProducer; import org.apache.lucene.search.join.ScoreMode; -import org.elasticsearch.action.admin.indices.mapping.put.PutMappingRequest; import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.common.CheckedBiConsumer; import org.elasticsearch.common.CheckedBiFunction; @@ -73,6 +72,7 @@ import java.io.IOException; import java.util.Collection; +import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; @@ -879,17 +879,29 @@ private MapperService mapperServiceForFieldWithModelSettings( String searchInferenceId, MinimalServiceSettings modelSettings ) throws IOException { - String mappingParams = "type=semantic_text,inference_id=" + inferenceId; + return mapperServiceForFieldWithModelSettingsAndIndexOptions(fieldName, inferenceId, searchInferenceId, modelSettings, null); + } + + private MapperService mapperServiceForFieldWithModelSettingsAndIndexOptions( + String fieldName, + String inferenceId, + String searchInferenceId, + MinimalServiceSettings modelSettings, + DenseVectorFieldMapper.IndexOptions indexOptions + ) throws IOException { + XContentBuilder mappingBuilder = JsonXContent.contentBuilder().startObject(); + mappingBuilder.startObject("properties").startObject(fieldName).field("type", "semantic_text").field("inference_id", inferenceId); if (searchInferenceId != null) { - mappingParams += ",search_inference_id=" + searchInferenceId; + mappingBuilder.field("search_inference_id", searchInferenceId); } + if (indexOptions != null) { + mappingBuilder.field("index_options", indexOptions); + } + + mappingBuilder.endObject().endObject().endObject(); MapperService mapperService = createMapperService(mapping(b -> {}), useLegacyFormat); - mapperService.merge( - "_doc", - new CompressedXContent(Strings.toString(PutMappingRequest.simpleMapping(fieldName, mappingParams))), - MapperService.MergeReason.MAPPING_UPDATE - ); + mapperService.merge("_doc", new CompressedXContent(Strings.toString(mappingBuilder)), MapperService.MergeReason.MAPPING_UPDATE); SemanticTextField semanticTextField = new SemanticTextField( useLegacyFormat, @@ -951,6 +963,105 @@ public void testExistsQueryDenseVector() throws IOException { assertThat(existsQuery, instanceOf(ESToParentBlockJoinQuery.class)); } + public void testDenseVectorIndexOptions() throws IOException { + final String fieldName = "field"; + final String inferenceId = "test_service"; + + List indexOptionsList = List.of( + DenseVectorFieldMapper.parseIndexOptions(fieldName, new HashMap<>(Map.of("type", "hnsw"))), + DenseVectorFieldMapper.parseIndexOptions(fieldName, new HashMap<>(Map.of("type", "int8_hnsw"))), + DenseVectorFieldMapper.parseIndexOptions(fieldName, new HashMap<>(Map.of("type", "int4_hnsw"))), + DenseVectorFieldMapper.parseIndexOptions(fieldName, new HashMap<>(Map.of("type", "bbq_hnsw"))), + DenseVectorFieldMapper.parseIndexOptions(fieldName, new HashMap<>(Map.of("type", "flat"))), + DenseVectorFieldMapper.parseIndexOptions(fieldName, new HashMap<>(Map.of("type", "int8_flat"))), + DenseVectorFieldMapper.parseIndexOptions(fieldName, new HashMap<>(Map.of("type", "int4_flat"))), + DenseVectorFieldMapper.parseIndexOptions(fieldName, new HashMap<>(Map.of("type", "bbq_flat"))), + DenseVectorFieldMapper.parseIndexOptions(fieldName, new HashMap<>(Map.of("type", "hnsw", "m", 32, "ef_construction", 200))) + ); + + for (DenseVectorFieldMapper.IndexOptions indexOptions : indexOptionsList) { + BiConsumer assertMapperService = (m, e) -> { + Mapper mapper = m.mappingLookup().getMapper(fieldName); + assertThat(mapper, instanceOf(SemanticTextFieldMapper.class)); + SemanticTextFieldMapper semanticTextFieldMapper = (SemanticTextFieldMapper) mapper; + + FieldMapper fieldMapper = semanticTextFieldMapper.fieldType().getEmbeddingsField(); + assertThat(fieldMapper, instanceOf(DenseVectorFieldMapper.class)); + DenseVectorFieldMapper denseVectorFieldMapper = (DenseVectorFieldMapper) fieldMapper; + + assertThat(denseVectorFieldMapper.indexOptions(), equalTo(e)); + }; + + MapperService floatMapperService = mapperServiceForFieldWithModelSettingsAndIndexOptions( + fieldName, + inferenceId, + inferenceId, + new MinimalServiceSettings( + TaskType.TEXT_EMBEDDING, + 1024, + SimilarityMeasure.COSINE, + DenseVectorFieldMapper.ElementType.FLOAT + ), + indexOptions + ); + assertMapperService.accept(floatMapperService, indexOptions); + } + } + + public void testDenseVectorIndexOptionsVaild() { + final String fieldName = "field"; + final String inferenceId = "test_service"; + + { + DenseVectorFieldMapper.IndexOptions indexOptions = DenseVectorFieldMapper.parseIndexOptions( + fieldName, + new HashMap<>(Map.of("type", "int8_hnsw")) + ); + MinimalServiceSettings invalidSettings = new MinimalServiceSettings( + TaskType.TEXT_EMBEDDING, + 1024, + SimilarityMeasure.L2_NORM, + DenseVectorFieldMapper.ElementType.BYTE + ); + + Exception e = expectThrows( + DocumentParsingException.class, + () -> mapperServiceForFieldWithModelSettingsAndIndexOptions( + fieldName, + inferenceId, + inferenceId, + invalidSettings, + indexOptions + ) + ); + assertThat(e.getCause().getMessage(), containsString("cannot be [byte] when using index type [int8_hnsw]")); + } + + { + DenseVectorFieldMapper.IndexOptions indexOptions = DenseVectorFieldMapper.parseIndexOptions( + fieldName, + new HashMap<>(Map.of("type", "bbq_hnsw")) + ); + MinimalServiceSettings invalidSettings = new MinimalServiceSettings( + TaskType.TEXT_EMBEDDING, + 10, + SimilarityMeasure.COSINE, + DenseVectorFieldMapper.ElementType.BYTE + ); + Exception e = expectThrows( + DocumentParsingException.class, + () -> mapperServiceForFieldWithModelSettingsAndIndexOptions( + fieldName, + inferenceId, + inferenceId, + invalidSettings, + indexOptions + ) + ); + assertThat(e.getCause().getMessage(), containsString("bbq_hnsw does not support dimensions fewer than 64")); + } + } + @Override protected void assertExistsQuery(MappedFieldType fieldType, Query query, LuceneDocument fields) { // Until a doc is indexed, the query is rewritten as match no docs diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_field_mapping.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_field_mapping.yml index fcbeab9262b20..01a8a89996066 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_field_mapping.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_field_mapping.yml @@ -192,6 +192,61 @@ setup: - match: { "test-index.mappings.properties.dense_field.model_settings.task_type": text_embedding } - length: { "test-index.mappings.properties.dense_field": 3 } +--- +"Indexes dense vector document with index_options": + + - do: + indices.create: + index: test-index-options + body: + mappings: + properties: + dense_field: + type: semantic_text + inference_id: dense-inference-id + index_options: + type: "hnsw" + m: 24 + ef_construction: 200 + + - do: + index: + index: test-index-options + id: doc_2 + body: + dense_field: + text: "these are not the droids you're looking for. He's free to go around" + inference: + inference_id: "dense-inference-id" + model_settings: + task_type: "text_embedding" + dimensions: 4 + similarity: "cosine" + element_type: "float" + index_options: + type: "int8_hnsw" + m: 24 + ef_construction: 100 + confidence_interval: 0.9 + chunks: + - text: "these are not the droids you're looking for" + embeddings: [0.04673296958208084, -0.03237321600317955, -0.02543032355606556, 0.056035321205854416] + - text: "He's free to go around" + embeddings: [0.00641461368650198, -0.0016253676731139421, -0.05126338079571724, 0.053438711911439896] + + # Checks mapping is updated when first doc arrives + - do: + indices.get_mapping: + index: test-index-options + + - match: { "test-index-options.mappings.properties.dense_field.type": "semantic_text" } + - match: { "test-index-options.mappings.properties.dense_field.inference_id": "dense-inference-id" } + - match: { "test-index-options.mappings.properties.dense_field.model_settings.task_type": "text_embedding" } + - match: { "test-index-options.mappings.properties.dense_field.index_options.type": "hnsw" } + - match: { "test-index-options.mappings.properties.dense_field.index_options.m": 24 } + - match: { "test-index-options.mappings.properties.dense_field.index_options.ef_construction": 200 } + - length: { "test-index-options.mappings.properties.dense_field": 4 } + --- "Field caps with text embedding": - requires: