diff --git a/docs/changelog/119967.yaml b/docs/changelog/119967.yaml new file mode 100644 index 0000000000000..be5543be20238 --- /dev/null +++ b/docs/changelog/119967.yaml @@ -0,0 +1,5 @@ +pr: 119967 +summary: Add `index_options` to `semantic_text` field mappings +area: Mapping +type: enhancement +issues: [ ] diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java index a66824d77f8d9..b10e8c793d782 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java @@ -78,7 +78,6 @@ import org.elasticsearch.search.vectors.RescoreKnnVectorQuery; import org.elasticsearch.search.vectors.VectorData; import org.elasticsearch.search.vectors.VectorSimilarityQuery; -import org.elasticsearch.xcontent.ToXContent; import org.elasticsearch.xcontent.ToXContentObject; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentParser; @@ -197,7 +196,7 @@ public static class Builder extends FieldMapper.Builder { }); private final Parameter similarity; - private final Parameter indexOptions; + private final Parameter indexOptions; private final Parameter indexed; private final Parameter> meta = Parameter.metaParam(); @@ -312,7 +311,7 @@ public Builder elementType(ElementType elementType) { return this; } - public Builder indexOptions(IndexOptions indexOptions) { + public Builder indexOptions(DenseVectorIndexOptions indexOptions) { this.indexOptions.setValue(indexOptions); return this; } @@ -1216,10 +1215,10 @@ public final String toString() { public abstract VectorSimilarityFunction vectorSimilarityFunction(IndexVersion indexVersion, ElementType elementType); } - public abstract static class IndexOptions implements ToXContent { + public abstract static class DenseVectorIndexOptions implements IndexOptions { final VectorIndexType type; - IndexOptions(VectorIndexType type) { + DenseVectorIndexOptions(VectorIndexType type) { this.type = type; } @@ -1243,7 +1242,7 @@ final boolean validateElementType(ElementType elementType, boolean throwOnError) return validElementType; } - abstract boolean updatableTo(IndexOptions update); + public abstract boolean updatableTo(DenseVectorIndexOptions update); public boolean validateDimension(int dim) { return validateDimension(dim, true); @@ -1257,10 +1256,14 @@ public boolean validateDimension(int dim, boolean throwOnError) { return supportsDimension; } - abstract boolean doEquals(IndexOptions other); + abstract boolean doEquals(DenseVectorIndexOptions other); abstract int doHashCode(); + public VectorIndexType getType() { + return type; + } + @Override public final boolean equals(Object other) { if (other == this) { @@ -1269,7 +1272,7 @@ public final boolean equals(Object other) { if (other == null || other.getClass() != getClass()) { return false; } - IndexOptions otherOptions = (IndexOptions) other; + DenseVectorIndexOptions otherOptions = (DenseVectorIndexOptions) other; return Objects.equals(type, otherOptions.type) && doEquals(otherOptions); } @@ -1279,7 +1282,7 @@ public final int hashCode() { } } - abstract static class QuantizedIndexOptions extends IndexOptions { + abstract static class QuantizedIndexOptions extends DenseVectorIndexOptions { final RescoreVector rescoreVector; QuantizedIndexOptions(VectorIndexType type, RescoreVector rescoreVector) { @@ -1291,7 +1294,7 @@ abstract static class QuantizedIndexOptions extends IndexOptions { public enum VectorIndexType { HNSW("hnsw", false) { @Override - public IndexOptions parseIndexOptions(String fieldName, Map indexOptionsMap, IndexVersion indexVersion) { + public DenseVectorIndexOptions parseIndexOptions(String fieldName, Map indexOptionsMap, IndexVersion indexVersion) { Object mNode = indexOptionsMap.remove("m"); Object efConstructionNode = indexOptionsMap.remove("ef_construction"); if (mNode == null) { @@ -1318,7 +1321,7 @@ public boolean supportsDimension(int dims) { }, INT8_HNSW("int8_hnsw", true) { @Override - public IndexOptions parseIndexOptions(String fieldName, Map indexOptionsMap, IndexVersion indexVersion) { + public DenseVectorIndexOptions parseIndexOptions(String fieldName, Map indexOptionsMap, IndexVersion indexVersion) { Object mNode = indexOptionsMap.remove("m"); Object efConstructionNode = indexOptionsMap.remove("ef_construction"); Object confidenceIntervalNode = indexOptionsMap.remove("confidence_interval"); @@ -1353,7 +1356,7 @@ public boolean supportsDimension(int dims) { } }, INT4_HNSW("int4_hnsw", true) { - public IndexOptions parseIndexOptions(String fieldName, Map indexOptionsMap, IndexVersion indexVersion) { + public DenseVectorIndexOptions parseIndexOptions(String fieldName, Map indexOptionsMap, IndexVersion indexVersion) { Object mNode = indexOptionsMap.remove("m"); Object efConstructionNode = indexOptionsMap.remove("ef_construction"); Object confidenceIntervalNode = indexOptionsMap.remove("confidence_interval"); @@ -1389,7 +1392,7 @@ public boolean supportsDimension(int dims) { }, FLAT("flat", false) { @Override - public IndexOptions parseIndexOptions(String fieldName, Map indexOptionsMap, IndexVersion indexVersion) { + public DenseVectorIndexOptions parseIndexOptions(String fieldName, Map indexOptionsMap, IndexVersion indexVersion) { MappingParser.checkNoRemainingFields(fieldName, indexOptionsMap); return new FlatIndexOptions(); } @@ -1406,7 +1409,7 @@ public boolean supportsDimension(int dims) { }, INT8_FLAT("int8_flat", true) { @Override - public IndexOptions parseIndexOptions(String fieldName, Map indexOptionsMap, IndexVersion indexVersion) { + public DenseVectorIndexOptions parseIndexOptions(String fieldName, Map indexOptionsMap, IndexVersion indexVersion) { Object confidenceIntervalNode = indexOptionsMap.remove("confidence_interval"); Float confidenceInterval = null; if (confidenceIntervalNode != null) { @@ -1432,7 +1435,7 @@ public boolean supportsDimension(int dims) { }, INT4_FLAT("int4_flat", true) { @Override - public IndexOptions parseIndexOptions(String fieldName, Map indexOptionsMap, IndexVersion indexVersion) { + public DenseVectorIndexOptions parseIndexOptions(String fieldName, Map indexOptionsMap, IndexVersion indexVersion) { Object confidenceIntervalNode = indexOptionsMap.remove("confidence_interval"); Float confidenceInterval = null; if (confidenceIntervalNode != null) { @@ -1458,7 +1461,7 @@ public boolean supportsDimension(int dims) { }, BBQ_HNSW("bbq_hnsw", true) { @Override - public IndexOptions parseIndexOptions(String fieldName, Map indexOptionsMap, IndexVersion indexVersion) { + public DenseVectorIndexOptions parseIndexOptions(String fieldName, Map indexOptionsMap, IndexVersion indexVersion) { Object mNode = indexOptionsMap.remove("m"); Object efConstructionNode = indexOptionsMap.remove("ef_construction"); if (mNode == null) { @@ -1492,7 +1495,7 @@ public boolean supportsDimension(int dims) { }, BBQ_FLAT("bbq_flat", true) { @Override - public IndexOptions parseIndexOptions(String fieldName, Map indexOptionsMap, IndexVersion indexVersion) { + public DenseVectorIndexOptions parseIndexOptions(String fieldName, Map indexOptionsMap, IndexVersion indexVersion) { RescoreVector rescoreVector = null; if (hasRescoreIndexVersion(indexVersion)) { rescoreVector = RescoreVector.fromIndexOptions(indexOptionsMap, indexVersion); @@ -1515,7 +1518,7 @@ public boolean supportsDimension(int dims) { } }; - static Optional fromString(String type) { + public static Optional fromString(String type) { return Stream.of(VectorIndexType.values()).filter(vectorIndexType -> vectorIndexType.name.equals(type)).findFirst(); } @@ -1527,7 +1530,11 @@ static Optional fromString(String type) { this.quantized = quantized; } - abstract IndexOptions parseIndexOptions(String fieldName, Map indexOptionsMap, IndexVersion indexVersion); + public abstract DenseVectorIndexOptions parseIndexOptions( + String fieldName, + Map indexOptionsMap, + IndexVersion indexVersion + ); public abstract boolean supportsElementType(ElementType elementType); @@ -1537,6 +1544,10 @@ public boolean isQuantized() { return quantized; } + public String getName() { + return name; + } + @Override public String toString() { return name; @@ -1572,7 +1583,7 @@ KnnVectorsFormat getVectorsFormat(ElementType elementType) { } @Override - boolean doEquals(IndexOptions o) { + boolean doEquals(DenseVectorIndexOptions o) { Int8FlatIndexOptions that = (Int8FlatIndexOptions) o; return Objects.equals(confidenceInterval, that.confidenceInterval) && Objects.equals(rescoreVector, that.rescoreVector); } @@ -1583,7 +1594,7 @@ int doHashCode() { } @Override - boolean updatableTo(IndexOptions update) { + public boolean updatableTo(DenseVectorIndexOptions update) { return update.type.equals(this.type) || update.type.equals(VectorIndexType.HNSW) || update.type.equals(VectorIndexType.INT8_HNSW) @@ -1594,7 +1605,7 @@ boolean updatableTo(IndexOptions update) { } } - static class FlatIndexOptions extends IndexOptions { + static class FlatIndexOptions extends DenseVectorIndexOptions { FlatIndexOptions() { super(VectorIndexType.FLAT); @@ -1617,12 +1628,12 @@ KnnVectorsFormat getVectorsFormat(ElementType elementType) { } @Override - boolean updatableTo(IndexOptions update) { + public boolean updatableTo(DenseVectorIndexOptions update) { return true; } @Override - public boolean doEquals(IndexOptions o) { + public boolean doEquals(DenseVectorIndexOptions o) { return o instanceof FlatIndexOptions; } @@ -1632,12 +1643,12 @@ public int doHashCode() { } } - static class Int4HnswIndexOptions extends QuantizedIndexOptions { + public static class Int4HnswIndexOptions extends QuantizedIndexOptions { private final int m; private final int efConstruction; private final float confidenceInterval; - Int4HnswIndexOptions(int m, int efConstruction, Float confidenceInterval, RescoreVector rescoreVector) { + public Int4HnswIndexOptions(int m, int efConstruction, Float confidenceInterval, RescoreVector rescoreVector) { super(VectorIndexType.INT4_HNSW, rescoreVector); this.m = m; this.efConstruction = efConstruction; @@ -1667,7 +1678,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws } @Override - public boolean doEquals(IndexOptions o) { + public boolean doEquals(DenseVectorIndexOptions o) { Int4HnswIndexOptions that = (Int4HnswIndexOptions) o; return m == that.m && efConstruction == that.efConstruction @@ -1696,7 +1707,7 @@ public String toString() { } @Override - boolean updatableTo(IndexOptions update) { + public boolean updatableTo(DenseVectorIndexOptions update) { boolean updatable = false; if (update.type.equals(VectorIndexType.INT4_HNSW)) { Int4HnswIndexOptions int4HnswIndexOptions = (Int4HnswIndexOptions) update; @@ -1739,7 +1750,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws } @Override - public boolean doEquals(IndexOptions o) { + public boolean doEquals(DenseVectorIndexOptions o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; Int4FlatIndexOptions that = (Int4FlatIndexOptions) o; @@ -1757,7 +1768,7 @@ public String toString() { } @Override - boolean updatableTo(IndexOptions update) { + public boolean updatableTo(DenseVectorIndexOptions update) { // TODO: add support for updating from flat, hnsw, and int8_hnsw and updating params return update.type.equals(this.type) || update.type.equals(VectorIndexType.HNSW) @@ -1804,7 +1815,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws } @Override - public boolean doEquals(IndexOptions o) { + public boolean doEquals(DenseVectorIndexOptions o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; Int8HnswIndexOptions that = (Int8HnswIndexOptions) o; @@ -1835,7 +1846,7 @@ public String toString() { } @Override - boolean updatableTo(IndexOptions update) { + public boolean updatableTo(DenseVectorIndexOptions update) { boolean updatable; if (update.type.equals(this.type)) { Int8HnswIndexOptions int8HnswIndexOptions = (Int8HnswIndexOptions) update; @@ -1853,7 +1864,7 @@ boolean updatableTo(IndexOptions update) { } } - static class HnswIndexOptions extends IndexOptions { + static class HnswIndexOptions extends DenseVectorIndexOptions { private final int m; private final int efConstruction; @@ -1872,7 +1883,7 @@ public KnnVectorsFormat getVectorsFormat(ElementType elementType) { } @Override - boolean updatableTo(IndexOptions update) { + public boolean updatableTo(DenseVectorIndexOptions update) { boolean updatable = update.type.equals(this.type); if (updatable) { // fewer connections would break assumptions on max number of connections (based on largest previous graph) during merge @@ -1896,7 +1907,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws } @Override - public boolean doEquals(IndexOptions o) { + public boolean doEquals(DenseVectorIndexOptions o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; HnswIndexOptions that = (HnswIndexOptions) o; @@ -1931,12 +1942,12 @@ KnnVectorsFormat getVectorsFormat(ElementType elementType) { } @Override - boolean updatableTo(IndexOptions update) { + public boolean updatableTo(DenseVectorIndexOptions update) { return update.type.equals(this.type) && ((BBQHnswIndexOptions) update).m >= this.m; } @Override - boolean doEquals(IndexOptions other) { + boolean doEquals(DenseVectorIndexOptions other) { BBQHnswIndexOptions that = (BBQHnswIndexOptions) other; return m == that.m && efConstruction == that.efConstruction && Objects.equals(rescoreVector, that.rescoreVector); } @@ -1985,12 +1996,12 @@ KnnVectorsFormat getVectorsFormat(ElementType elementType) { } @Override - boolean updatableTo(IndexOptions update) { + public boolean updatableTo(DenseVectorIndexOptions update) { return update.type.equals(this.type) || update.type.equals(VectorIndexType.BBQ_HNSW); } @Override - boolean doEquals(IndexOptions other) { + boolean doEquals(DenseVectorIndexOptions other) { return other instanceof BBQFlatIndexOptions; } @@ -2069,7 +2080,7 @@ public static final class DenseVectorFieldType extends SimpleMappedFieldType { private final boolean indexed; private final VectorSimilarity similarity; private final IndexVersion indexVersionCreated; - private final IndexOptions indexOptions; + private final DenseVectorIndexOptions indexOptions; private final boolean isSyntheticSource; public DenseVectorFieldType( @@ -2079,7 +2090,7 @@ public DenseVectorFieldType( Integer dims, boolean indexed, VectorSimilarity similarity, - IndexOptions indexOptions, + DenseVectorIndexOptions indexOptions, Map meta, boolean isSyntheticSource ) { @@ -2399,14 +2410,14 @@ public List fetchValues(Source source, int doc, List ignoredValu } } - private final IndexOptions indexOptions; + private final DenseVectorIndexOptions indexOptions; private final IndexVersion indexCreatedVersion; private DenseVectorFieldMapper( String simpleName, MappedFieldType mappedFieldType, BuilderParams params, - IndexOptions indexOptions, + DenseVectorIndexOptions indexOptions, IndexVersion indexCreatedVersion ) { super(simpleName, mappedFieldType, params); @@ -2554,7 +2565,7 @@ public FieldMapper.Builder getMergeBuilder() { return new Builder(leafName(), indexCreatedVersion).init(this); } - private static IndexOptions parseIndexOptions(String fieldName, Object propNode, IndexVersion indexVersion) { + private static DenseVectorIndexOptions parseIndexOptions(String fieldName, Object propNode, IndexVersion indexVersion) { @SuppressWarnings("unchecked") Map indexOptionsMap = (Map) propNode; Object typeNode = indexOptionsMap.remove("type"); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/IndexOptions.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/IndexOptions.java new file mode 100644 index 0000000000000..4679b7d3f0982 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/IndexOptions.java @@ -0,0 +1,17 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.mapper.vectors; + +import org.elasticsearch.xcontent.ToXContent; + +/** + * Represents general index options that can be attached to a semantic or vector field. + */ +public interface IndexOptions extends ToXContent {} diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldTypeTests.java index 3923e5ee59394..5f47c67eb0308 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldTypeTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldTypeTests.java @@ -53,14 +53,14 @@ private static DenseVectorFieldMapper.RescoreVector randomRescoreVector() { return new DenseVectorFieldMapper.RescoreVector(randomBoolean() ? 0 : randomFloatBetween(1.0F, 10.0F, false)); } - private DenseVectorFieldMapper.IndexOptions randomIndexOptionsNonQuantized() { + private DenseVectorFieldMapper.DenseVectorIndexOptions randomIndexOptionsNonQuantized() { return randomFrom( new DenseVectorFieldMapper.HnswIndexOptions(randomIntBetween(1, 100), randomIntBetween(1, 10_000)), new DenseVectorFieldMapper.FlatIndexOptions() ); } - private DenseVectorFieldMapper.IndexOptions randomIndexOptionsAll() { + public static DenseVectorFieldMapper.DenseVectorIndexOptions randomIndexOptionsAll() { return randomFrom( new DenseVectorFieldMapper.HnswIndexOptions(randomIntBetween(1, 100), randomIntBetween(1, 10_000)), new DenseVectorFieldMapper.Int8HnswIndexOptions( @@ -93,11 +93,13 @@ private DenseVectorFieldMapper.IndexOptions randomIndexOptionsAll() { ); } - private DenseVectorFieldMapper.IndexOptions randomIndexOptionsHnswQuantized() { + private DenseVectorFieldMapper.DenseVectorIndexOptions randomIndexOptionsHnswQuantized() { return randomIndexOptionsHnswQuantized(randomBoolean() ? null : randomRescoreVector()); } - private DenseVectorFieldMapper.IndexOptions randomIndexOptionsHnswQuantized(DenseVectorFieldMapper.RescoreVector rescoreVector) { + private DenseVectorFieldMapper.DenseVectorIndexOptions randomIndexOptionsHnswQuantized( + DenseVectorFieldMapper.RescoreVector rescoreVector + ) { return randomFrom( new DenseVectorFieldMapper.Int8HnswIndexOptions( randomIntBetween(1, 100), diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java index 396b50eb7cfc6..2744aa36b5933 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java @@ -17,6 +17,7 @@ import java.util.Set; import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper.SEMANTIC_TEXT_EXCLUDE_SUB_FIELDS_FROM_FIELD_CAPS; +import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper.SEMANTIC_TEXT_INDEX_OPTIONS; import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper.SEMANTIC_TEXT_SUPPORT_CHUNKING_CONFIG; import static org.elasticsearch.xpack.inference.queries.SemanticKnnVectorQueryRewriteInterceptor.SEMANTIC_KNN_FILTER_FIX; import static org.elasticsearch.xpack.inference.queries.SemanticKnnVectorQueryRewriteInterceptor.SEMANTIC_KNN_VECTOR_QUERY_REWRITE_INTERCEPTION_SUPPORTED; @@ -70,7 +71,8 @@ public Set getTestFeatures() { SemanticTextFieldMapper.SEMANTIC_TEXT_HANDLE_EMPTY_INPUT, SEMANTIC_TEXT_SUPPORT_CHUNKING_CONFIG, SEMANTIC_TEXT_MATCH_ALL_HIGHLIGHTER, - SEMANTIC_TEXT_EXCLUDE_SUB_FIELDS_FROM_FIELD_CAPS + SEMANTIC_TEXT_EXCLUDE_SUB_FIELDS_FROM_FIELD_CAPS, + SEMANTIC_TEXT_INDEX_OPTIONS ); } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java index 37974a044e75f..0d01f641c74a6 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java @@ -27,6 +27,7 @@ import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.common.xcontent.XContentParserUtils; +import org.elasticsearch.common.xcontent.support.XContentMapValues; import org.elasticsearch.core.CheckedConsumer; import org.elasticsearch.core.Nullable; import org.elasticsearch.features.NodeFeature; @@ -137,12 +138,15 @@ public class SemanticTextFieldMapper extends FieldMapper implements InferenceFie public static final NodeFeature SEMANTIC_TEXT_EXCLUDE_SUB_FIELDS_FROM_FIELD_CAPS = new NodeFeature( "semantic_text.exclude_sub_fields_from_field_caps" ); + public static final NodeFeature SEMANTIC_TEXT_INDEX_OPTIONS = new NodeFeature("semantic_text.index_options"); public static final String CONTENT_TYPE = "semantic_text"; public static final String DEFAULT_ELSER_2_INFERENCE_ID = DEFAULT_ELSER_ID; public static final float DEFAULT_RESCORE_OVERSAMPLE = 3.0f; + static final String INDEX_OPTIONS_FIELD = "index_options"; + public static final TypeParser parser(Supplier modelRegistry) { return new TypeParser( (n, c) -> new Builder(n, c::bitSetProducer, c.getIndexSettings(), modelRegistry.get()), @@ -199,6 +203,16 @@ public static class Builder extends FieldMapper.Builder { Objects::toString ).acceptsNull().setMergeValidator(SemanticTextFieldMapper::canMergeModelSettings); + private final Parameter indexOptions = new Parameter<>( + INDEX_OPTIONS_FIELD, + true, + () -> null, + (n, c, o) -> parseIndexOptionsFromMap(n, o, c.indexVersionCreated()), + mapper -> ((SemanticTextFieldType) mapper.fieldType()).indexOptions, + XContentBuilder::field, + Objects::toString + ).acceptsNull(); + @SuppressWarnings("unchecked") private final Parameter chunkingSettings = new Parameter<>( CHUNKING_SETTINGS_FIELD, @@ -242,6 +256,7 @@ public Builder( indexSettings.getIndexVersionCreated(), useLegacyFormat, resolvedModelSettings, + indexOptions.get(), bitSetProducer, indexSettings ); @@ -270,7 +285,7 @@ public Builder setChunkingSettings(ChunkingSettings value) { @Override protected Parameter[] getParameters() { - return new Parameter[] { inferenceId, searchInferenceId, modelSettings, chunkingSettings, meta }; + return new Parameter[] { inferenceId, searchInferenceId, modelSettings, chunkingSettings, indexOptions, meta }; } @Override @@ -278,12 +293,22 @@ protected void merge(FieldMapper mergeWith, Conflicts conflicts, MapperMergeCont SemanticTextFieldMapper semanticMergeWith = (SemanticTextFieldMapper) mergeWith; semanticMergeWith = copySettings(semanticMergeWith, mapperMergeContext); + // We make sure to merge the inference field first to catch any model conflicts + try { + var context = mapperMergeContext.createChildContext(semanticMergeWith.leafName(), ObjectMapper.Dynamic.FALSE); + var inferenceField = inferenceFieldBuilder.apply(context.getMapperBuilderContext()); + var mergedInferenceField = inferenceField.merge(semanticMergeWith.fieldType().getInferenceField(), context); + inferenceFieldBuilder = c -> mergedInferenceField; + } catch (Exception e) { + // Wrap errors in nicer messages that hide inference field internals + String errorMessage = e.getMessage() != null + ? e.getMessage().replaceAll(SemanticTextField.getEmbeddingsFieldName(""), "") + : ""; + throw new IllegalArgumentException(errorMessage, e); + } + super.merge(semanticMergeWith, conflicts, mapperMergeContext); conflicts.check(); - var context = mapperMergeContext.createChildContext(semanticMergeWith.leafName(), ObjectMapper.Dynamic.FALSE); - var inferenceField = inferenceFieldBuilder.apply(context.getMapperBuilderContext()); - var mergedInferenceField = inferenceField.merge(semanticMergeWith.fieldType().getInferenceField(), context); - inferenceFieldBuilder = c -> mergedInferenceField; } /** @@ -340,6 +365,10 @@ public SemanticTextFieldMapper build(MapperBuilderContext context) { validateServiceSettings(modelSettings.get(), resolvedModelSettings); } + if (context.getMergeReason() != MapperService.MergeReason.MAPPING_RECOVERY && indexOptions.get() != null) { + validateIndexOptions(indexOptions.get(), inferenceId.getValue(), resolvedModelSettings); + } + final String fullName = context.buildFullName(leafName()); if (context.isInNestedContext()) { @@ -356,6 +385,7 @@ public SemanticTextFieldMapper build(MapperBuilderContext context) { searchInferenceId.getValue(), modelSettings.getValue(), chunkingSettings.getValue(), + indexOptions.getValue(), inferenceField, useLegacyFormat, meta.getValue() @@ -392,6 +422,33 @@ private void validateServiceSettings(MinimalServiceSettings settings, MinimalSer } } + private void validateIndexOptions(SemanticTextIndexOptions indexOptions, String inferenceId, MinimalServiceSettings modelSettings) { + if (indexOptions == null) { + return; + } + + if (modelSettings == null) { + throw new IllegalArgumentException( + "Model settings must be set to validate index options for inference ID [" + inferenceId + "]" + ); + } + + if (indexOptions.type() == SemanticTextIndexOptions.SupportedIndexOptions.DENSE_VECTOR) { + + if (modelSettings.taskType() != TEXT_EMBEDDING) { + throw new IllegalArgumentException( + "Invalid task type for index options, required [" + TEXT_EMBEDDING + "] but was [" + modelSettings.taskType() + "]" + ); + } + + int dims = modelSettings.dimensions() != null ? modelSettings.dimensions() : 0; + DenseVectorFieldMapper.DenseVectorIndexOptions denseVectorIndexOptions = + (DenseVectorFieldMapper.DenseVectorIndexOptions) indexOptions.indexOptions(); + denseVectorIndexOptions.validate(modelSettings.elementType(), dims, true); + } + + } + /** * As necessary, copy settings from this builder to the passed-in mapper. * Used to preserve {@link MinimalServiceSettings} when updating a semantic text mapping to one where the model settings @@ -666,6 +723,7 @@ public static class SemanticTextFieldType extends SimpleMappedFieldType { private final String searchInferenceId; private final MinimalServiceSettings modelSettings; private final ChunkingSettings chunkingSettings; + private final SemanticTextIndexOptions indexOptions; private final ObjectMapper inferenceField; private final boolean useLegacyFormat; @@ -675,6 +733,7 @@ public SemanticTextFieldType( String searchInferenceId, MinimalServiceSettings modelSettings, ChunkingSettings chunkingSettings, + SemanticTextIndexOptions indexOptions, ObjectMapper inferenceField, boolean useLegacyFormat, Map meta @@ -684,6 +743,7 @@ public SemanticTextFieldType( this.searchInferenceId = searchInferenceId; this.modelSettings = modelSettings; this.chunkingSettings = chunkingSettings; + this.indexOptions = indexOptions; this.inferenceField = inferenceField; this.useLegacyFormat = useLegacyFormat; } @@ -723,6 +783,10 @@ public ChunkingSettings getChunkingSettings() { return chunkingSettings; } + public SemanticTextIndexOptions getIndexOptions() { + return indexOptions; + } + public ObjectMapper getInferenceField() { return inferenceField; } @@ -1037,11 +1101,12 @@ private static ObjectMapper createInferenceField( IndexVersion indexVersionCreated, boolean useLegacyFormat, @Nullable MinimalServiceSettings modelSettings, + @Nullable SemanticTextIndexOptions indexOptions, Function bitSetProducer, IndexSettings indexSettings ) { return new ObjectMapper.Builder(INFERENCE_FIELD, Optional.of(ObjectMapper.Subobjects.ENABLED)).dynamic(ObjectMapper.Dynamic.FALSE) - .add(createChunksField(indexVersionCreated, useLegacyFormat, modelSettings, bitSetProducer, indexSettings)) + .add(createChunksField(indexVersionCreated, useLegacyFormat, modelSettings, indexOptions, bitSetProducer, indexSettings)) .build(context); } @@ -1049,6 +1114,7 @@ private static NestedObjectMapper.Builder createChunksField( IndexVersion indexVersionCreated, boolean useLegacyFormat, @Nullable MinimalServiceSettings modelSettings, + @Nullable SemanticTextIndexOptions indexOptions, Function bitSetProducer, IndexSettings indexSettings ) { @@ -1060,7 +1126,7 @@ private static NestedObjectMapper.Builder createChunksField( ); chunksField.dynamic(ObjectMapper.Dynamic.FALSE); if (modelSettings != null) { - chunksField.add(createEmbeddingsField(indexSettings.getIndexVersionCreated(), modelSettings, useLegacyFormat)); + chunksField.add(createEmbeddingsField(indexSettings.getIndexVersionCreated(), modelSettings, indexOptions, useLegacyFormat)); } if (useLegacyFormat) { var chunkTextField = new KeywordFieldMapper.Builder(TEXT_FIELD, indexVersionCreated).indexed(false).docValues(false); @@ -1074,6 +1140,7 @@ private static NestedObjectMapper.Builder createChunksField( private static Mapper.Builder createEmbeddingsField( IndexVersion indexVersionCreated, MinimalServiceSettings modelSettings, + SemanticTextIndexOptions indexOptions, boolean useLegacyFormat ) { return switch (modelSettings.taskType()) { @@ -1097,14 +1164,28 @@ private static Mapper.Builder createEmbeddingsField( } denseVectorMapperBuilder.dimensions(modelSettings.dimensions()); denseVectorMapperBuilder.elementType(modelSettings.elementType()); - - DenseVectorFieldMapper.IndexOptions defaultIndexOptions = null; - if (indexVersionCreated.onOrAfter(SEMANTIC_TEXT_DEFAULTS_TO_BBQ_BACKPORT_8_X)) { - defaultIndexOptions = defaultSemanticDenseIndexOptions(); + if (indexOptions != null) { + DenseVectorFieldMapper.DenseVectorIndexOptions denseVectorIndexOptions = + (DenseVectorFieldMapper.DenseVectorIndexOptions) indexOptions.indexOptions(); + denseVectorMapperBuilder.indexOptions(denseVectorIndexOptions); + denseVectorIndexOptions.validate(modelSettings.elementType(), modelSettings.dimensions(), true); + } else { + DenseVectorFieldMapper.DenseVectorIndexOptions defaultIndexOptions = defaultDenseVectorIndexOptions( + indexVersionCreated, + modelSettings + ); + if (defaultIndexOptions != null) { + denseVectorMapperBuilder.indexOptions(defaultIndexOptions); + } } - if (defaultIndexOptions != null - && defaultIndexOptions.validate(modelSettings.elementType(), modelSettings.dimensions(), false)) { - denseVectorMapperBuilder.indexOptions(defaultIndexOptions); + + boolean hasUserSpecifiedIndexOptions = indexOptions != null; + DenseVectorFieldMapper.DenseVectorIndexOptions denseVectorIndexOptions = hasUserSpecifiedIndexOptions + ? (DenseVectorFieldMapper.DenseVectorIndexOptions) indexOptions.indexOptions() + : defaultDenseVectorIndexOptions(indexVersionCreated, modelSettings); + + if (denseVectorIndexOptions != null) { + denseVectorMapperBuilder.indexOptions(denseVectorIndexOptions); } yield denseVectorMapperBuilder; @@ -1113,15 +1194,46 @@ private static Mapper.Builder createEmbeddingsField( }; } - static DenseVectorFieldMapper.IndexOptions defaultSemanticDenseIndexOptions() { + static DenseVectorFieldMapper.DenseVectorIndexOptions defaultDenseVectorIndexOptions( + IndexVersion indexVersionCreated, + MinimalServiceSettings modelSettings + ) { + + if (modelSettings.dimensions() == null) { + return null; // Cannot determine default index options without dimensions + } + // As embedding models for text perform better with BBQ, we aggressively default semantic_text fields to use optimized index - // options outside of dense_vector defaults + // options + if (indexVersionDefaultsToBbqHnsw(indexVersionCreated)) { + + DenseVectorFieldMapper.DenseVectorIndexOptions defaultBbqHnswIndexOptions = defaultBbqHnswDenseVectorIndexOptions(); + return defaultBbqHnswIndexOptions.validate(modelSettings.elementType(), modelSettings.dimensions(), false) + ? defaultBbqHnswIndexOptions + : null; + } + + return null; + } + + static boolean indexVersionDefaultsToBbqHnsw(IndexVersion indexVersion) { + return indexVersion.onOrAfter(SEMANTIC_TEXT_DEFAULTS_TO_BBQ_BACKPORT_8_X); + } + + static DenseVectorFieldMapper.DenseVectorIndexOptions defaultBbqHnswDenseVectorIndexOptions() { int m = Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN; int efConstruction = Lucene99HnswVectorsFormat.DEFAULT_BEAM_WIDTH; DenseVectorFieldMapper.RescoreVector rescoreVector = new DenseVectorFieldMapper.RescoreVector(DEFAULT_RESCORE_OVERSAMPLE); return new DenseVectorFieldMapper.BBQHnswIndexOptions(m, efConstruction, rescoreVector); } + static SemanticTextIndexOptions defaultBbqHnswSemanticTextIndexOptions() { + return new SemanticTextIndexOptions( + SemanticTextIndexOptions.SupportedIndexOptions.DENSE_VECTOR, + defaultBbqHnswDenseVectorIndexOptions() + ); + } + private static boolean canMergeModelSettings(MinimalServiceSettings previous, MinimalServiceSettings current, Conflicts conflicts) { if (previous != null && current != null && previous.canMergeWith(current)) { return true; @@ -1132,4 +1244,23 @@ private static boolean canMergeModelSettings(MinimalServiceSettings previous, Mi conflicts.addConflict("model_settings", ""); return false; } + + private static SemanticTextIndexOptions parseIndexOptionsFromMap(String fieldName, Object node, IndexVersion indexVersion) { + + if (node == null) { + return null; + } + + Map map = XContentMapValues.nodeMapValue(node, INDEX_OPTIONS_FIELD); + if (map.size() != 1) { + throw new IllegalArgumentException("Too many index options provided, found [" + map.keySet() + "]"); + } + Map.Entry entry = map.entrySet().iterator().next(); + SemanticTextIndexOptions.SupportedIndexOptions indexOptions = SemanticTextIndexOptions.SupportedIndexOptions.fromValue( + entry.getKey() + ); + @SuppressWarnings("unchecked") + Map indexOptionsMap = (Map) entry.getValue(); + return new SemanticTextIndexOptions(indexOptions, indexOptions.parseIndexOptions(fieldName, indexOptionsMap, indexVersion)); + } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextIndexOptions.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextIndexOptions.java new file mode 100644 index 0000000000000..c062adad2f551 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextIndexOptions.java @@ -0,0 +1,110 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.mapper; + +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.xcontent.support.XContentMapValues; +import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; +import org.elasticsearch.index.mapper.vectors.IndexOptions; +import org.elasticsearch.xcontent.ToXContent; +import org.elasticsearch.xcontent.XContentBuilder; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Locale; +import java.util.Map; + +/** + * Represents index options for a semantic_text field. + * We represent semantic_text index_options as nested within their respective type. For example: + * "index_options": { + * "dense_vector": { + * "type": "bbq_hnsw + * } + * } + */ +public class SemanticTextIndexOptions implements ToXContent { + + private static final String TYPE_FIELD = "type"; + + private final SupportedIndexOptions type; + private final IndexOptions indexOptions; + + public SemanticTextIndexOptions(SupportedIndexOptions type, IndexOptions indexOptions) { + this.type = type; + this.indexOptions = indexOptions; + } + + public SupportedIndexOptions type() { + return type; + } + + public IndexOptions indexOptions() { + return indexOptions; + } + + public enum SupportedIndexOptions { + DENSE_VECTOR("dense_vector") { + @Override + public IndexOptions parseIndexOptions(String fieldName, Map map, IndexVersion indexVersion) { + return parseDenseVectorIndexOptionsFromMap(fieldName, map, indexVersion); + } + }; + + public final String value; + + SupportedIndexOptions(String value) { + this.value = value; + } + + public abstract IndexOptions parseIndexOptions(String fieldName, Map map, IndexVersion indexVersion); + + public static SupportedIndexOptions fromValue(String value) { + return Arrays.stream(SupportedIndexOptions.values()) + .filter(option -> option.value.equals(value)) + .findFirst() + .orElseThrow(() -> new IllegalArgumentException("Unknown index options type [" + value + "]")); + } + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field(type.value.toLowerCase(Locale.ROOT)); + indexOptions.toXContent(builder, params); + builder.endObject(); + return builder; + } + + @Override + public String toString() { + return Strings.toString(this); + } + + private static DenseVectorFieldMapper.DenseVectorIndexOptions parseDenseVectorIndexOptionsFromMap( + String fieldName, + Map map, + IndexVersion indexVersion + ) { + try { + Object type = map.remove(TYPE_FIELD); + if (type == null) { + throw new IllegalArgumentException("Required " + TYPE_FIELD); + } + DenseVectorFieldMapper.VectorIndexType vectorIndexType = DenseVectorFieldMapper.VectorIndexType.fromString( + XContentMapValues.nodeStringValue(type, null) + ).orElseThrow(() -> new IllegalArgumentException("Unsupported index options " + TYPE_FIELD + " " + type)); + + return vectorIndexType.parseIndexOptions(fieldName, map, indexVersion); + } catch (Exception exc) { + throw new ElasticsearchException(exc); + } + } +} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticInferenceMetadataFieldsMapperTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticInferenceMetadataFieldsMapperTests.java index 4c44940c6bf1d..c02b11163c4c5 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticInferenceMetadataFieldsMapperTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticInferenceMetadataFieldsMapperTests.java @@ -114,5 +114,4 @@ static IndexVersion getRandomCompatibleIndexVersion(boolean useLegacyFormat) { ); } } - } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java index 1416ebfc69b7f..68e079b05af54 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java @@ -53,6 +53,7 @@ import org.elasticsearch.index.mapper.ParsedDocument; import org.elasticsearch.index.mapper.SourceToParse; import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; +import org.elasticsearch.index.mapper.vectors.DenseVectorFieldTypeTests; import org.elasticsearch.index.mapper.vectors.SparseVectorFieldMapper; import org.elasticsearch.index.mapper.vectors.XFeatureField; import org.elasticsearch.index.query.SearchExecutionContext; @@ -87,11 +88,13 @@ import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.Set; import java.util.function.BiConsumer; import java.util.function.Supplier; +import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldTypeTests.randomIndexOptionsAll; import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.CHUNKED_EMBEDDINGS_FIELD; import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.CHUNKS_FIELD; import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.INFERENCE_FIELD; @@ -102,6 +105,8 @@ import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.getChunksFieldName; import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.getEmbeddingsFieldName; import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper.DEFAULT_ELSER_2_INFERENCE_ID; +import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper.DEFAULT_RESCORE_OVERSAMPLE; +import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper.INDEX_OPTIONS_FIELD; import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldTests.generateRandomChunkingSettings; import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldTests.generateRandomChunkingSettingsOtherThan; import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldTests.randomSemanticText; @@ -182,6 +187,17 @@ private static void validateIndexVersion(IndexVersion indexVersion, boolean useL } } + private MapperService createMapperService(String mappings, boolean useLegacyFormat) throws IOException { + var settings = Settings.builder() + .put( + IndexMetadata.SETTING_INDEX_VERSION_CREATED.getKey(), + SemanticInferenceMetadataFieldsMapperTests.getRandomCompatibleIndexVersion(useLegacyFormat) + ) + .put(InferenceMetadataFieldsMapper.USE_LEGACY_SEMANTIC_TEXT_FORMAT.getKey(), useLegacyFormat) + .build(); + return createMapperService(settings, mappings); + } + @Override protected Settings getIndexSettings() { return Settings.builder() @@ -237,7 +253,17 @@ protected IngestScriptSupport ingestScriptSupport() { @Override public MappedFieldType getMappedFieldType() { - return new SemanticTextFieldMapper.SemanticTextFieldType("field", "fake-inference-id", null, null, null, null, false, Map.of()); + return new SemanticTextFieldMapper.SemanticTextFieldType( + "field", + "fake-inference-id", + null, + null, + null, + null, + null, + false, + Map.of() + ); } @Override @@ -255,7 +281,7 @@ public void testDefaults() throws Exception { MapperService mapperService = createMapperService(fieldMapping, useLegacyFormat); DocumentMapper mapper = mapperService.documentMapper(); assertEquals(Strings.toString(expectedMapping), mapper.mappingSource().toString()); - assertSemanticTextField(mapperService, fieldName, false); + assertSemanticTextField(mapperService, fieldName, false, null, null); assertInferenceEndpoints(mapperService, fieldName, DEFAULT_ELSER_2_INFERENCE_ID, DEFAULT_ELSER_2_INFERENCE_ID); ParsedDocument doc1 = mapper.parse(source(this::writeField)); @@ -285,7 +311,7 @@ public void testSetInferenceEndpoints() throws IOException { { final XContentBuilder fieldMapping = fieldMapping(b -> b.field("type", "semantic_text").field(INFERENCE_ID_FIELD, inferenceId)); final MapperService mapperService = createMapperService(fieldMapping, useLegacyFormat); - assertSemanticTextField(mapperService, fieldName, false); + assertSemanticTextField(mapperService, fieldName, false, null, null); assertInferenceEndpoints(mapperService, fieldName, inferenceId, inferenceId); assertSerialization.accept(fieldMapping, mapperService); } @@ -299,7 +325,7 @@ public void testSetInferenceEndpoints() throws IOException { .field(SEARCH_INFERENCE_ID_FIELD, searchInferenceId) ); final MapperService mapperService = createMapperService(fieldMapping, useLegacyFormat); - assertSemanticTextField(mapperService, fieldName, false); + assertSemanticTextField(mapperService, fieldName, false, null, null); assertInferenceEndpoints(mapperService, fieldName, DEFAULT_ELSER_2_INFERENCE_ID, searchInferenceId); assertSerialization.accept(expectedMapping, mapperService); } @@ -310,7 +336,7 @@ public void testSetInferenceEndpoints() throws IOException { .field(SEARCH_INFERENCE_ID_FIELD, searchInferenceId) ); MapperService mapperService = createMapperService(fieldMapping, useLegacyFormat); - assertSemanticTextField(mapperService, fieldName, false); + assertSemanticTextField(mapperService, fieldName, false, null, null); assertInferenceEndpoints(mapperService, fieldName, inferenceId, searchInferenceId); assertSerialization.accept(fieldMapping, mapperService); } @@ -370,7 +396,7 @@ public void testInvalidTaskTypes() { useLegacyFormat ) ); - assertThat(e.getMessage(), containsString("Failed to parse mapping: Wrong [task_type]")); + assertThat(e.getMessage(), containsString("Wrong [task_type], expected text_embedding or sparse_embedding")); } } @@ -399,7 +425,7 @@ public void testMultiFieldsSupport() throws IOException { b.endObject(); b.endObject(); }), useLegacyFormat); - assertSemanticTextField(mapperService, "field.semantic", true); + assertSemanticTextField(mapperService, "field.semantic", true, null, null); mapperService = createMapperService(fieldMapping(b -> { b.field("type", "semantic_text"); @@ -413,7 +439,7 @@ public void testMultiFieldsSupport() throws IOException { b.endObject(); b.endObject(); }), useLegacyFormat); - assertSemanticTextField(mapperService, "field", true); + assertSemanticTextField(mapperService, "field", true, null, null); mapperService = createMapperService(fieldMapping(b -> { b.field("type", "semantic_text"); @@ -431,8 +457,8 @@ public void testMultiFieldsSupport() throws IOException { b.endObject(); b.endObject(); }), useLegacyFormat); - assertSemanticTextField(mapperService, "field", true); - assertSemanticTextField(mapperService, "field.semantic", true); + assertSemanticTextField(mapperService, "field", true, null, null); + assertSemanticTextField(mapperService, "field.semantic", true, null, null); Exception e = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> { b.field("type", "semantic_text"); @@ -454,7 +480,7 @@ public void testUpdatesToInferenceIdNotSupported() throws IOException { mapping(b -> b.startObject(fieldName).field("type", "semantic_text").field("inference_id", "test_model").endObject()), useLegacyFormat ); - assertSemanticTextField(mapperService, fieldName, false); + assertSemanticTextField(mapperService, fieldName, false, null, null); Exception e = expectThrows( IllegalArgumentException.class, () -> merge( @@ -476,7 +502,7 @@ public void testDynamicUpdate() throws IOException { inferenceId, new MinimalServiceSettings("service", TaskType.SPARSE_EMBEDDING, null, null, null) ); - assertSemanticTextField(mapperService, fieldName, true); + assertSemanticTextField(mapperService, fieldName, true, null, null); assertInferenceEndpoints(mapperService, fieldName, inferenceId, inferenceId); } @@ -487,7 +513,7 @@ public void testDynamicUpdate() throws IOException { searchInferenceId, new MinimalServiceSettings("service", TaskType.SPARSE_EMBEDDING, null, null, null) ); - assertSemanticTextField(mapperService, fieldName, true); + assertSemanticTextField(mapperService, fieldName, true, null, null); assertInferenceEndpoints(mapperService, fieldName, inferenceId, searchInferenceId); } } @@ -499,7 +525,7 @@ public void testUpdateModelSettings() throws IOException { mapping(b -> b.startObject(fieldName).field("type", "semantic_text").field("inference_id", "test_model").endObject()), useLegacyFormat ); - assertSemanticTextField(mapperService, fieldName, false); + assertSemanticTextField(mapperService, fieldName, false, null, null); { Exception exc = expectThrows( MapperParsingException.class, @@ -531,14 +557,14 @@ public void testUpdateModelSettings() throws IOException { .endObject() ) ); - assertSemanticTextField(mapperService, fieldName, true); + assertSemanticTextField(mapperService, fieldName, true, null, null); } { merge( mapperService, mapping(b -> b.startObject(fieldName).field("type", "semantic_text").field("inference_id", "test_model").endObject()) ); - assertSemanticTextField(mapperService, fieldName, true); + assertSemanticTextField(mapperService, fieldName, true, null, null); } { Exception exc = expectThrows( @@ -559,18 +585,33 @@ public void testUpdateModelSettings() throws IOException { ) ) ); - assertThat( - exc.getMessage(), - containsString( - "Cannot update parameter [model_settings] " - + "from [service=null, task_type=sparse_embedding] " - + "to [service=null, task_type=text_embedding, dimensions=10, similarity=cosine, element_type=float]" - ) - ); + assertThat(exc.getMessage(), containsString("cannot be changed from type [sparse_vector] to [dense_vector]")); } } } + public void testDenseVectorIndexOptionValidation() throws IOException { + for (int depth = 1; depth < 5; depth++) { + String inferenceId = "test_model"; + String fieldName = randomFieldName(depth); + + DenseVectorFieldMapper.DenseVectorIndexOptions indexOptions = DenseVectorFieldTypeTests.randomIndexOptionsAll(); + Exception exc = expectThrows(MapperParsingException.class, () -> createMapperService(mapping(b -> { + b.startObject(fieldName); + b.field("type", SemanticTextFieldMapper.CONTENT_TYPE); + b.field(INFERENCE_ID_FIELD, inferenceId); + b.startObject(INDEX_OPTIONS_FIELD); + b.startObject("dense_vector"); + b.field("type", indexOptions.getType().name().toLowerCase(Locale.ROOT)); + b.field("unsupported_param", "any_value"); + b.endObject(); + b.endObject(); + b.endObject(); + }), useLegacyFormat)); + assertTrue(exc.getMessage().contains("unsupported parameters")); + } + } + public void testUpdateSearchInferenceId() throws IOException { final String inferenceId = "test_inference_id"; final String searchInferenceId1 = "test_search_inference_id_1"; @@ -587,19 +628,19 @@ public void testUpdateSearchInferenceId() throws IOException { for (int depth = 1; depth < 5; depth++) { String fieldName = randomFieldName(depth); MapperService mapperService = createMapperService(buildMapping.apply(fieldName, null), useLegacyFormat); - assertSemanticTextField(mapperService, fieldName, false); + assertSemanticTextField(mapperService, fieldName, false, null, null); assertInferenceEndpoints(mapperService, fieldName, inferenceId, inferenceId); merge(mapperService, buildMapping.apply(fieldName, searchInferenceId1)); - assertSemanticTextField(mapperService, fieldName, false); + assertSemanticTextField(mapperService, fieldName, false, null, null); assertInferenceEndpoints(mapperService, fieldName, inferenceId, searchInferenceId1); merge(mapperService, buildMapping.apply(fieldName, searchInferenceId2)); - assertSemanticTextField(mapperService, fieldName, false); + assertSemanticTextField(mapperService, fieldName, false, null, null); assertInferenceEndpoints(mapperService, fieldName, inferenceId, searchInferenceId2); merge(mapperService, buildMapping.apply(fieldName, null)); - assertSemanticTextField(mapperService, fieldName, false); + assertSemanticTextField(mapperService, fieldName, false, null, null); assertInferenceEndpoints(mapperService, fieldName, inferenceId, inferenceId); mapperService = mapperServiceForFieldWithModelSettings( @@ -607,19 +648,19 @@ public void testUpdateSearchInferenceId() throws IOException { inferenceId, new MinimalServiceSettings("my-service", TaskType.SPARSE_EMBEDDING, null, null, null) ); - assertSemanticTextField(mapperService, fieldName, true); + assertSemanticTextField(mapperService, fieldName, true, null, null); assertInferenceEndpoints(mapperService, fieldName, inferenceId, inferenceId); merge(mapperService, buildMapping.apply(fieldName, searchInferenceId1)); - assertSemanticTextField(mapperService, fieldName, true); + assertSemanticTextField(mapperService, fieldName, true, null, null); assertInferenceEndpoints(mapperService, fieldName, inferenceId, searchInferenceId1); merge(mapperService, buildMapping.apply(fieldName, searchInferenceId2)); - assertSemanticTextField(mapperService, fieldName, true); + assertSemanticTextField(mapperService, fieldName, true, null, null); assertInferenceEndpoints(mapperService, fieldName, inferenceId, searchInferenceId2); merge(mapperService, buildMapping.apply(fieldName, null)); - assertSemanticTextField(mapperService, fieldName, true); + assertSemanticTextField(mapperService, fieldName, true, null, null); assertInferenceEndpoints(mapperService, fieldName, inferenceId, inferenceId); } } @@ -633,7 +674,7 @@ private static void assertSemanticTextField( String fieldName, boolean expectedModelSettings, ChunkingSettings expectedChunkingSettings, - DenseVectorFieldMapper.IndexOptions expectedIndexOptions + SemanticTextIndexOptions expectedIndexOptions ) { Mapper mapper = mapperService.mappingLookup().getMapper(fieldName); assertNotNull(mapper); @@ -644,7 +685,7 @@ private static void assertSemanticTextField( assertNotNull(fieldType); assertThat(fieldType, instanceOf(SemanticTextFieldMapper.SemanticTextFieldType.class)); SemanticTextFieldMapper.SemanticTextFieldType semanticTextFieldType = (SemanticTextFieldMapper.SemanticTextFieldType) fieldType; - assertTrue(semanticFieldMapper.fieldType() == semanticTextFieldType); + assertSame(semanticFieldMapper.fieldType(), semanticTextFieldType); NestedObjectMapper chunksMapper = mapperService.mappingLookup() .nestedLookup() @@ -672,7 +713,7 @@ private static void assertSemanticTextField( assertNotNull(embeddingsMapper); assertThat(embeddingsMapper, instanceOf(FieldMapper.class)); FieldMapper embeddingsFieldMapper = (FieldMapper) embeddingsMapper; - assertTrue(embeddingsFieldMapper.fieldType() == mapperService.mappingLookup().getFieldType(getEmbeddingsFieldName(fieldName))); + assertSame(embeddingsFieldMapper.fieldType(), mapperService.mappingLookup().getFieldType(getEmbeddingsFieldName(fieldName))); assertThat(embeddingsMapper.fullPath(), equalTo(getEmbeddingsFieldName(fieldName))); switch (semanticFieldMapper.fieldType().getModelSettings().taskType()) { case SPARSE_EMBEDDING -> { @@ -685,7 +726,7 @@ private static void assertSemanticTextField( assertThat(embeddingsMapper, instanceOf(DenseVectorFieldMapper.class)); DenseVectorFieldMapper denseVectorFieldMapper = (DenseVectorFieldMapper) embeddingsMapper; if (expectedIndexOptions != null) { - assertEquals(expectedIndexOptions, denseVectorFieldMapper.fieldType().getIndexOptions()); + assertEquals(expectedIndexOptions.indexOptions(), denseVectorFieldMapper.fieldType().getIndexOptions()); } else { assertNull(denseVectorFieldMapper.fieldType().getIndexOptions()); } @@ -725,35 +766,39 @@ public void testSuccessfulParse() throws IOException { final String searchInferenceId = randomAlphaOfLength(8); final boolean setSearchInferenceId = randomBoolean(); - Model model1 = TestModel.createRandomInstance(TaskType.SPARSE_EMBEDDING); - Model model2 = TestModel.createRandomInstance(TaskType.SPARSE_EMBEDDING); + TaskType taskType = TaskType.SPARSE_EMBEDDING; + Model model1 = TestModel.createRandomInstance(taskType); + Model model2 = TestModel.createRandomInstance(taskType); ChunkingSettings chunkingSettings = null; // Some chunking settings configs can produce different Lucene docs counts + SemanticTextIndexOptions indexOptions = randomSemanticTextIndexOptions(taskType); XContentBuilder mapping = mapping(b -> { addSemanticTextMapping( b, fieldName1, model1.getInferenceEntityId(), setSearchInferenceId ? searchInferenceId : null, - chunkingSettings + chunkingSettings, + indexOptions ); addSemanticTextMapping( b, fieldName2, model2.getInferenceEntityId(), setSearchInferenceId ? searchInferenceId : null, - chunkingSettings + chunkingSettings, + indexOptions ); }); MapperService mapperService = createMapperService(mapping, useLegacyFormat); - assertSemanticTextField(mapperService, fieldName1, false); + assertSemanticTextField(mapperService, fieldName1, false, null, null); assertInferenceEndpoints( mapperService, fieldName1, model1.getInferenceEntityId(), setSearchInferenceId ? searchInferenceId : model1.getInferenceEntityId() ); - assertSemanticTextField(mapperService, fieldName2, false); + assertSemanticTextField(mapperService, fieldName2, false, null, null); assertInferenceEndpoints( mapperService, fieldName2, @@ -857,7 +902,7 @@ public void testSuccessfulParse() throws IOException { public void testMissingInferenceId() throws IOException { final MapperService mapperService = createMapperService( - mapping(b -> addSemanticTextMapping(b, "field", "my_id", null, null)), + mapping(b -> addSemanticTextMapping(b, "field", "my_id", null, null, null)), useLegacyFormat ); @@ -885,7 +930,7 @@ public void testMissingInferenceId() throws IOException { public void testMissingModelSettingsAndChunks() throws IOException { MapperService mapperService = createMapperService( - mapping(b -> addSemanticTextMapping(b, "field", "my_id", null, null)), + mapping(b -> addSemanticTextMapping(b, "field", "my_id", null, null, null)), useLegacyFormat ); IllegalArgumentException ex = expectThrows( @@ -905,7 +950,7 @@ public void testMissingModelSettingsAndChunks() throws IOException { public void testMissingTaskType() throws IOException { MapperService mapperService = createMapperService( - mapping(b -> addSemanticTextMapping(b, "field", "my_id", null, null)), + mapping(b -> addSemanticTextMapping(b, "field", "my_id", null, null, null)), useLegacyFormat ); IllegalArgumentException ex = expectThrows( @@ -969,6 +1014,7 @@ public void testDenseVectorElementType() throws IOException { public void testSettingAndUpdatingChunkingSettings() throws IOException { Model model = TestModel.createRandomInstance(TaskType.SPARSE_EMBEDDING); final ChunkingSettings chunkingSettings = generateRandomChunkingSettings(false); + final SemanticTextIndexOptions indexOptions = null; String fieldName = "field"; SemanticTextField randomSemanticText = randomSemanticText( @@ -981,20 +1027,25 @@ public void testSettingAndUpdatingChunkingSettings() throws IOException { ); MapperService mapperService = createMapperService( - mapping(b -> addSemanticTextMapping(b, fieldName, model.getInferenceEntityId(), null, chunkingSettings)), + mapping(b -> addSemanticTextMapping(b, fieldName, model.getInferenceEntityId(), null, chunkingSettings, indexOptions)), useLegacyFormat ); assertSemanticTextField(mapperService, fieldName, false, chunkingSettings, null); ChunkingSettings newChunkingSettings = generateRandomChunkingSettingsOtherThan(chunkingSettings); - merge(mapperService, mapping(b -> addSemanticTextMapping(b, fieldName, model.getInferenceEntityId(), null, newChunkingSettings))); - assertSemanticTextField(mapperService, fieldName, false, newChunkingSettings, null); + merge( + mapperService, + mapping(b -> addSemanticTextMapping(b, fieldName, model.getInferenceEntityId(), null, newChunkingSettings, indexOptions)) + ); + assertSemanticTextField(mapperService, fieldName, false, newChunkingSettings, indexOptions); } public void testModelSettingsRequiredWithChunks() throws IOException { // Create inference results where model settings are set to null and chunks are provided - Model model = TestModel.createRandomInstance(TaskType.SPARSE_EMBEDDING); + TaskType taskType = TaskType.SPARSE_EMBEDDING; + Model model = TestModel.createRandomInstance(taskType); ChunkingSettings chunkingSettings = generateRandomChunkingSettings(false); + SemanticTextIndexOptions indexOptions = randomSemanticTextIndexOptions(taskType); SemanticTextField randomSemanticText = randomSemanticText( useLegacyFormat, "field", @@ -1017,7 +1068,7 @@ public void testModelSettingsRequiredWithChunks() throws IOException { ); MapperService mapperService = createMapperService( - mapping(b -> addSemanticTextMapping(b, "field", model.getInferenceEntityId(), null, chunkingSettings)), + mapping(b -> addSemanticTextMapping(b, "field", model.getInferenceEntityId(), null, chunkingSettings, indexOptions)), useLegacyFormat ); SourceToParse source = source(b -> addSemanticTextInferenceResults(useLegacyFormat, b, List.of(inferenceResults))); @@ -1118,13 +1169,31 @@ public void testExistsQueryDenseVector() throws IOException { assertThat(existsQuery, instanceOf(ESToParentBlockJoinQuery.class)); } - private static DenseVectorFieldMapper.IndexOptions defaultDenseVectorIndexOptions() { + private static DenseVectorFieldMapper.DenseVectorIndexOptions defaultDenseVectorIndexOptions() { // These are the default index options for dense_vector fields, and used for semantic_text fields incompatible with BBQ. int m = Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN; int efConstruction = Lucene99HnswVectorsFormat.DEFAULT_BEAM_WIDTH; return new DenseVectorFieldMapper.Int8HnswIndexOptions(m, efConstruction, null, null); } + private static SemanticTextIndexOptions defaultDenseVectorSemanticIndexOptions() { + return new SemanticTextIndexOptions(SemanticTextIndexOptions.SupportedIndexOptions.DENSE_VECTOR, defaultDenseVectorIndexOptions()); + } + + private static DenseVectorFieldMapper.DenseVectorIndexOptions defaultBbqHnswDenseVectorIndexOptions() { + int m = Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN; + int efConstruction = Lucene99HnswVectorsFormat.DEFAULT_BEAM_WIDTH; + DenseVectorFieldMapper.RescoreVector rescoreVector = new DenseVectorFieldMapper.RescoreVector(DEFAULT_RESCORE_OVERSAMPLE); + return new DenseVectorFieldMapper.BBQHnswIndexOptions(m, efConstruction, rescoreVector); + } + + private static SemanticTextIndexOptions defaultBbqHnswSemanticTextIndexOptions() { + return new SemanticTextIndexOptions( + SemanticTextIndexOptions.SupportedIndexOptions.DENSE_VECTOR, + defaultBbqHnswDenseVectorIndexOptions() + ); + } + public void testDefaultIndexOptions() throws IOException { // We default to BBQ for eligible dense vectors @@ -1138,7 +1207,7 @@ public void testDefaultIndexOptions() throws IOException { b.field("element_type", "float"); b.endObject(); }), useLegacyFormat, IndexVersions.SEMANTIC_TEXT_DEFAULTS_TO_BBQ_BACKPORT_8_X); - assertSemanticTextField(mapperService, "field", true, null, SemanticTextFieldMapper.defaultSemanticDenseIndexOptions()); + assertSemanticTextField(mapperService, "field", true, null, SemanticTextFieldMapper.defaultBbqHnswSemanticTextIndexOptions()); // Element types that are incompatible with BBQ will continue to use dense_vector defaults mapperService = createMapperService(fieldMapping(b -> { @@ -1164,7 +1233,7 @@ public void testDefaultIndexOptions() throws IOException { b.field("element_type", "float"); b.endObject(); }), useLegacyFormat, IndexVersions.SEMANTIC_TEXT_DEFAULTS_TO_BBQ_BACKPORT_8_X); - assertSemanticTextField(mapperService, "field", true, null, defaultDenseVectorIndexOptions()); + assertSemanticTextField(mapperService, "field", true, null, defaultDenseVectorSemanticIndexOptions()); // Previous index versions do not set BBQ index options mapperService = createMapperService(fieldMapping(b -> { @@ -1181,8 +1250,134 @@ public void testDefaultIndexOptions() throws IOException { IndexVersions.INFERENCE_METADATA_FIELDS_BACKPORT, IndexVersionUtils.getPreviousVersion(IndexVersions.SEMANTIC_TEXT_DEFAULTS_TO_BBQ_BACKPORT_8_X) ); - assertSemanticTextField(mapperService, "field", true, null, defaultDenseVectorIndexOptions()); + assertSemanticTextField(mapperService, "field", true, null, defaultDenseVectorSemanticIndexOptions()); + } + + public void testSpecifiedDenseVectorIndexOptions() throws IOException { + // Specifying index options will override default index option settings + var mapperService = createMapperService(fieldMapping(b -> { + b.field("type", "semantic_text"); + b.field("inference_id", "another_inference_id"); + b.startObject("model_settings"); + b.field("task_type", "text_embedding"); + b.field("dimensions", 100); + b.field("similarity", "cosine"); + b.field("element_type", "float"); + b.endObject(); + b.startObject("index_options"); + b.startObject("dense_vector"); + b.field("type", "int4_hnsw"); + b.field("m", 20); + b.field("ef_construction", 90); + b.field("confidence_interval", 0.4); + b.endObject(); + b.endObject(); + }), useLegacyFormat, IndexVersions.INFERENCE_METADATA_FIELDS_BACKPORT); + assertSemanticTextField( + mapperService, + "field", + true, + null, + new SemanticTextIndexOptions( + SemanticTextIndexOptions.SupportedIndexOptions.DENSE_VECTOR, + new DenseVectorFieldMapper.Int4HnswIndexOptions(20, 90, 0.4f, null) + ) + ); + + // Specifying partial index options will in the remainder index options with defaults + mapperService = createMapperService(fieldMapping(b -> { + b.field("type", "semantic_text"); + b.field("inference_id", "another_inference_id"); + b.startObject("model_settings"); + b.field("task_type", "text_embedding"); + b.field("dimensions", 100); + b.field("similarity", "cosine"); + b.field("element_type", "float"); + b.endObject(); + b.startObject("index_options"); + b.startObject("dense_vector"); + b.field("type", "int4_hnsw"); + b.endObject(); + b.endObject(); + }), useLegacyFormat, IndexVersions.INFERENCE_METADATA_FIELDS_BACKPORT); + assertSemanticTextField( + mapperService, + "field", + true, + null, + new SemanticTextIndexOptions( + SemanticTextIndexOptions.SupportedIndexOptions.DENSE_VECTOR, + new DenseVectorFieldMapper.Int4HnswIndexOptions(16, 100, 0f, null) + ) + ); + + // Incompatible index options will fail + Exception e = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> { + b.field("type", "semantic_text"); + b.field("inference_id", "another_inference_id"); + b.startObject("model_settings"); + b.field("task_type", "sparse_embedding"); + b.endObject(); + b.startObject("index_options"); + b.startObject("dense_vector"); + b.field("type", "int8_hnsw"); + b.endObject(); + b.endObject(); + }), useLegacyFormat, IndexVersions.INFERENCE_METADATA_FIELDS_BACKPORT)); + assertThat(e.getMessage(), containsString("Invalid task type")); + + e = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> { + b.field("type", "semantic_text"); + b.field("inference_id", "another_inference_id"); + b.startObject("model_settings"); + b.field("task_type", "text_embedding"); + b.field("dimensions", 100); + b.field("similarity", "cosine"); + b.field("element_type", "float"); + b.endObject(); + b.startObject("index_options"); + b.startObject("dense_vector"); + b.field("type", "bbq_flat"); + b.field("ef_construction", 100); + b.endObject(); + b.endObject(); + }), useLegacyFormat, IndexVersions.INFERENCE_METADATA_FIELDS_BACKPORT)); + assertThat(e.getMessage(), containsString("unsupported parameters: [ef_construction : 100]")); + + e = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> { + b.field("type", "semantic_text"); + b.field("inference_id", "another_inference_id"); + b.startObject("model_settings"); + b.field("task_type", "text_embedding"); + b.field("dimensions", 100); + b.field("similarity", "cosine"); + b.field("element_type", "float"); + b.endObject(); + b.startObject("index_options"); + b.startObject("dense_vector"); + b.field("type", "invalid"); + b.endObject(); + b.endObject(); + }), useLegacyFormat, IndexVersions.INFERENCE_METADATA_FIELDS_BACKPORT)); + assertThat(e.getMessage(), containsString("Unsupported index options type invalid")); + + } + + public static SemanticTextIndexOptions randomSemanticTextIndexOptions() { + TaskType taskType = randomFrom(TaskType.SPARSE_EMBEDDING, TaskType.TEXT_EMBEDDING); + return randomSemanticTextIndexOptions(taskType); + } + + public static SemanticTextIndexOptions randomSemanticTextIndexOptions(TaskType taskType) { + + if (taskType == TaskType.TEXT_EMBEDDING) { + return randomBoolean() + ? null + : new SemanticTextIndexOptions(SemanticTextIndexOptions.SupportedIndexOptions.DENSE_VECTOR, randomIndexOptionsAll()); + } + + return null; } @Override @@ -1196,7 +1391,8 @@ private static void addSemanticTextMapping( String fieldName, String inferenceId, String searchInferenceId, - ChunkingSettings chunkingSettings + ChunkingSettings chunkingSettings, + SemanticTextIndexOptions indexOptions ) throws IOException { mappingBuilder.startObject(fieldName); mappingBuilder.field("type", SemanticTextFieldMapper.CONTENT_TYPE); @@ -1209,6 +1405,10 @@ private static void addSemanticTextMapping( mappingBuilder.mapContents(chunkingSettings.asMap()); mappingBuilder.endObject(); } + if (indexOptions != null) { + mappingBuilder.field(INDEX_OPTIONS_FIELD); + indexOptions.toXContent(mappingBuilder, null); + } mappingBuilder.endObject(); } diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_field_mapping.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_field_mapping.yml index a1c2663b22cc9..5cc0d83685169 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_field_mapping.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_field_mapping.yml @@ -3,6 +3,56 @@ setup: cluster_features: "gte_v8.15.0" reason: semantic_text introduced in 8.15.0 + - do: + inference.put: + task_type: sparse_embedding + inference_id: sparse-inference-id + body: > + { + "service": "test_service", + "service_settings": { + "model": "my_model", + "api_key": "abc64" + }, + "task_settings": { + } + } + + - do: + inference.put: + task_type: text_embedding + inference_id: dense-inference-id + body: > + { + "service": "text_embedding_test_service", + "service_settings": { + "model": "my_model", + "dimensions": 4, + "similarity": "cosine", + "api_key": "abc64" + }, + "task_settings": { + } + } + + - do: + inference.put: + task_type: text_embedding + inference_id: dense-inference-id-compatible-with-bbq + body: > + { + "service": "text_embedding_test_service", + "service_settings": { + "model": "my_model", + "dimensions": 64, + "similarity": "cosine", + "api_key": "abc64" + }, + "task_settings": { + } + } + + - do: indices.create: index: test-index @@ -157,7 +207,7 @@ setup: - match: { "test-index.mappings.properties.dense_field.type": semantic_text } - match: { "test-index.mappings.properties.dense_field.inference_id": dense-inference-id } - - length: { "test-index.mappings.properties.dense_field": 2 } + - not_exists: test-index.mappings.properties.dense_field.model_settings - do: index: @@ -177,10 +227,10 @@ setup: dense_field: - start_offset: 0 end_offset: 44 - embeddings: [0.04673296958208084, -0.03237321600317955, -0.02543032355606556, 0.056035321205854416] + embeddings: [ 0.04673296958208084, -0.03237321600317955, -0.02543032355606556, 0.056035321205854416 ] - start_offset: 44 end_offset: 67 - embeddings: [0.00641461368650198, -0.0016253676731139421, -0.05126338079571724, 0.053438711911439896] + embeddings: [ 0.00641461368650198, -0.0016253676731139421, -0.05126338079571724, 0.053438711911439896 ] # Checks mapping is updated when first doc arrives - do: @@ -190,7 +240,72 @@ setup: - match: { "test-index.mappings.properties.dense_field.type": semantic_text } - match: { "test-index.mappings.properties.dense_field.inference_id": dense-inference-id } - match: { "test-index.mappings.properties.dense_field.model_settings.task_type": text_embedding } - - length: { "test-index.mappings.properties.dense_field": 3 } + - exists: test-index.mappings.properties.dense_field.model_settings + +--- +"Indexes dense vector document with bbq compatible model": + - requires: + cluster_features: "semantic_text.index_options" + reason: index_options introduced in 8.19.0 + + - do: + indices.create: + index: test-index-options-with-bbq + body: + settings: + index: + mapping: + semantic_text: + use_legacy_format: false + mappings: + properties: + dense_field: + type: semantic_text + inference_id: dense-inference-id-compatible-with-bbq + + # Checks vector mapping is not updated until first doc arrives + - do: + indices.get_mapping: + index: test-index-options-with-bbq + + - match: { "test-index-options-with-bbq.mappings.properties.dense_field.type": semantic_text } + - match: { "test-index-options-with-bbq.mappings.properties.dense_field.inference_id": dense-inference-id-compatible-with-bbq } + - not_exists: test-index-options-with-bbq.mappings.properties.dense_field.index_options + - not_exists: test-index-options-with-bbq.mappings.properties.dense_field.model_settings + + - do: + index: + index: test-index-options-with-bbq + id: doc_2 + body: + dense_field: "these are not the droids you're looking for. He's free to go around" + _inference_fields.dense_field: + inference: + inference_id: dense-inference-id-compatible-with-bbq + model_settings: + task_type: text_embedding + dimensions: 64 + similarity: cosine + element_type: float + chunks: + dense_field: + - start_offset: 0 + end_offset: 44 + embeddings: [ 0.05, -0.03, -0.03, 0.06, 0.01, -0.02, 0.07, 0.02, -0.04, 0.03, 0.00, 0.05, -0.06, 0.04, -0.01, 0.02, -0.05, 0.01, 0.03, -0.02, 0.06, -0.04, 0.00, 0.05, -0.03, 0.02, 0.01, -0.01, 0.04, -0.06, 0.03, 0.02, -0.02, 0.06, -0.01, 0.00, 0.04, -0.05, 0.01, 0.03, -0.04, 0.02, -0.03, 0.05, -0.02, 0.01, 0.03, -0.06, 0.04, 0.00, -0.01, 0.06, -0.03, 0.02, 0.01, -0.04, 0.05, -0.01, 0.00, 0.04, -0.05, 0.02, 0.03, -0.02 ] + - start_offset: 44 + end_offset: 67 + embeddings: [ 0.05, -0.03, -0.03, 0.06, 0.01, -0.02, 0.07, 0.02, -0.04, 0.03, 0.00, 0.05, -0.06, 0.04, -0.01, 0.02, -0.05, 0.01, 0.03, -0.02, 0.06, -0.04, 0.00, 0.05, -0.03, 0.02, 0.01, -0.01, 0.04, -0.06, 0.03, 0.02, -0.02, 0.06, -0.01, 0.00, 0.04, -0.05, 0.01, 0.03, -0.04, 0.02, -0.03, 0.05, -0.02, 0.01, 0.03, -0.06, 0.04, 0.00, -0.01, 0.06, -0.03, 0.02, 0.01, -0.04, 0.05, -0.01, 0.00, 0.04, -0.05, 0.02, 0.03, -0.02 ] + + + # Checks mapping is updated when first doc arrives + - do: + indices.get_mapping: + index: test-index-options-with-bbq + + - match: { "test-index-options-with-bbq.mappings.properties.dense_field.type": semantic_text } + - match: { "test-index-options-with-bbq.mappings.properties.dense_field.inference_id": dense-inference-id-compatible-with-bbq } + - match: { "test-index-options-with-bbq.mappings.properties.dense_field.model_settings.task_type": text_embedding } + - not_exists: test-index-options-with-bbq.mappings.properties.dense_field.index_options --- "Field caps with text embedding": @@ -236,10 +351,10 @@ setup: dense_field: - start_offset: 0 end_offset: 44 - embeddings: [0.04673296958208084, -0.03237321600317955, -0.02543032355606556, 0.056035321205854416] + embeddings: [ 0.04673296958208084, -0.03237321600317955, -0.02543032355606556, 0.056035321205854416 ] - start_offset: 44 end_offset: 67 - embeddings: [0.00641461368650198, -0.0016253676731139421, -0.05126338079571724, 0.053438711911439896] + embeddings: [ 0.00641461368650198, -0.0016253676731139421, -0.05126338079571724, 0.053438711911439896 ] refresh: true - do: @@ -268,43 +383,43 @@ setup: --- "Cannot be used directly as a nested field": - - do: - catch: /semantic_text field \[nested.semantic\] cannot be nested/ - indices.create: - index: test-nested-index - body: - mappings: - properties: - nested: - type: nested - properties: - semantic: - type: semantic_text - inference_id: sparse-inference-id - another_field: - type: keyword + - do: + catch: /semantic_text field \[nested.semantic\] cannot be nested/ + indices.create: + index: test-nested-index + body: + mappings: + properties: + nested: + type: nested + properties: + semantic: + type: semantic_text + inference_id: sparse-inference-id + another_field: + type: keyword --- "Cannot be used as a nested field on nested objects": - - do: - catch: /semantic_text field \[nested.nested_object.semantic\] cannot be nested/ - indices.create: - index: test-nested-index - body: - mappings: - properties: - nested: - type: nested - properties: - nested_object: - type: object - properties: - semantic: - type: semantic_text - inference_id: sparse-inference-id - another_field: - type: keyword + - do: + catch: /semantic_text field \[nested.nested_object.semantic\] cannot be nested/ + indices.create: + index: test-nested-index + body: + mappings: + properties: + nested: + type: nested + properties: + nested_object: + type: object + properties: + semantic: + type: semantic_text + inference_id: sparse-inference-id + another_field: + type: keyword --- "Cannot be in an object field with subobjects disabled": @@ -339,11 +454,11 @@ setup: - requires: cluster_features: "semantic_text.always_emit_inference_id_fix" reason: always emit inference ID fix added in 8.17.0 - test_runner_features: [capabilities] + test_runner_features: [ capabilities ] capabilities: - method: GET path: /_inference - capabilities: [default_elser_2] + capabilities: [ default_elser_2 ] - do: indices.create: @@ -432,3 +547,289 @@ setup: - not_exists: fields.dense_field.inference.chunks.offset - not_exists: fields.dense_field.inference.chunks - not_exists: fields.dense_field.inference + +--- +"Users can set dense vector index options and index documents using those options": + - requires: + cluster_features: "semantic_text.index_options" + reason: Index options introduced in 8.19.0 + + - do: + indices.create: + index: test-index-options + body: + settings: + index: + mapping: + semantic_text: + use_legacy_format: false + mappings: + properties: + semantic_field: + type: semantic_text + inference_id: dense-inference-id + index_options: + dense_vector: + type: int8_hnsw + m: 20 + ef_construction: 100 + confidence_interval: 1.0 + + - do: + indices.get_mapping: + index: test-index-options + + - match: { "test-index-options.mappings.properties.semantic_field.index_options.dense_vector.type": "int8_hnsw" } + - match: { "test-index-options.mappings.properties.semantic_field.index_options.dense_vector.m": 20 } + - match: { "test-index-options.mappings.properties.semantic_field.index_options.dense_vector.ef_construction": 100 } + - match: { "test-index-options.mappings.properties.semantic_field.index_options.dense_vector.confidence_interval": 1.0 } + + - do: + index: + index: test-index-options + id: doc_1 + body: + semantic_field: "these are not the droids you're looking for. He's free to go around" + _inference_fields.semantic_field: + inference: + inference_id: dense-inference-id + model_settings: + task_type: text_embedding + dimensions: 4 + similarity: cosine + element_type: float + chunks: + semantic_field: + - start_offset: 0 + end_offset: 44 + embeddings: [ 0.04673296958208084, -0.03237321600317955, -0.02543032355606556, 0.056035321205854416 ] + - start_offset: 44 + end_offset: 67 + embeddings: [ 0.00641461368650198, -0.0016253676731139421, -0.05126338079571724, 0.053438711911439896 ] + + - do: + indices.get_mapping: + index: test-index-options + + - match: { "test-index-options.mappings.properties.semantic_field.index_options.dense_vector.type": int8_hnsw } + - match: { "test-index-options.mappings.properties.semantic_field.index_options.dense_vector.m": 20 } + - match: { "test-index-options.mappings.properties.semantic_field.index_options.dense_vector.ef_construction": 100 } + - match: { "test-index-options.mappings.properties.semantic_field.index_options.dense_vector.confidence_interval": 1.0 } + +--- +"Specifying incompatible dense vector index options will fail": + - requires: + cluster_features: "semantic_text.index_options" + reason: Index options introduced in 8.19.0 + + - do: + catch: /unsupported parameters/ + indices.create: + index: test-incompatible-index-options + body: + settings: + index: + mapping: + semantic_text: + use_legacy_format: false + mappings: + properties: + semantic_field: + type: semantic_text + inference_id: dense-inference-id + index_options: + dense_vector: + type: bbq_flat + ef_construction: 100 + +--- +"Specifying unsupported index option types will fail": + - requires: + cluster_features: "semantic_text.index_options" + reason: Index options introduced in 8.19.0 + + - do: + catch: /Unsupported index options type/ + indices.create: + index: test-invalid-index-options-dense + body: + settings: + index: + mapping: + semantic_text: + use_legacy_format: false + mappings: + properties: + semantic_field: + type: semantic_text + inference_id: dense-inference-id + index_options: + dense_vector: + type: foo + - do: + catch: bad_request + indices.create: + index: test-invalid-index-options-sparse + body: + settings: + index: + mapping: + semantic_text: + use_legacy_format: false + mappings: + properties: + semantic_field: + type: semantic_text + index_options: + sparse_vector: + type: int8_hnsw + +--- +"Index option type is required": + - requires: + cluster_features: "semantic_text.index_options" + reason: Index options introduced in 8.19.0 + + - do: + catch: /Required type/ + indices.create: + index: test-invalid-index-options-dense + body: + settings: + index: + mapping: + semantic_text: + use_legacy_format: false + mappings: + properties: + semantic_field: + type: semantic_text + inference_id: dense-inference-id + index_options: + dense_vector: + foo: bar + +--- +"Specifying index options requires model information": + - requires: + cluster_features: "semantic_text.index_options" + reason: Index options introduced in 8.19.0 + + - do: + catch: /Model settings must be set to validate index options/ + indices.create: + index: my-custom-semantic-index + body: + settings: + index: + mapping: + semantic_text: + use_legacy_format: false + mappings: + properties: + semantic_field: + type: semantic_text + inference_id: nonexistent-inference-id + index_options: + dense_vector: + type: int8_hnsw + + - match: { status: 400 } + + - do: + indices.create: + index: my-custom-semantic-index + body: + settings: + index: + mapping: + semantic_text: + use_legacy_format: false + mappings: + properties: + semantic_field: + type: semantic_text + inference_id: nonexistent-inference-id + + - do: + indices.get_mapping: + index: my-custom-semantic-index + + - match: { "my-custom-semantic-index.mappings.properties.semantic_field.type": semantic_text } + - match: { "my-custom-semantic-index.mappings.properties.semantic_field.inference_id": nonexistent-inference-id } + - not_exists: my-custom-semantic-index.mappings.properties.semantic_field.index_options + +--- +"Updating index options": + - requires: + cluster_features: "semantic_text.index_options" + reason: Index options introduced in 8.19.0 + + - do: + indices.create: + index: test-index-options + body: + settings: + index: + mapping: + semantic_text: + use_legacy_format: false + mappings: + properties: + semantic_field: + type: semantic_text + inference_id: dense-inference-id + index_options: + dense_vector: + type: int8_hnsw + m: 16 + ef_construction: 100 + confidence_interval: 1.0 + + - do: + indices.get_mapping: + index: test-index-options + + - match: { "test-index-options.mappings.properties.semantic_field.index_options.dense_vector.type": "int8_hnsw" } + - match: { "test-index-options.mappings.properties.semantic_field.index_options.dense_vector.m": 16 } + - match: { "test-index-options.mappings.properties.semantic_field.index_options.dense_vector.ef_construction": 100 } + - match: { "test-index-options.mappings.properties.semantic_field.index_options.dense_vector.confidence_interval": 1.0 } + + - do: + indices.put_mapping: + index: test-index-options + body: + properties: + semantic_field: + type: semantic_text + inference_id: dense-inference-id + index_options: + dense_vector: + type: int8_hnsw + m: 20 + ef_construction: 90 + confidence_interval: 1.0 + + - do: + indices.get_mapping: + index: test-index-options + + - match: { "test-index-options.mappings.properties.semantic_field.index_options.dense_vector.type": "int8_hnsw" } + - match: { "test-index-options.mappings.properties.semantic_field.index_options.dense_vector.m": 20 } + - match: { "test-index-options.mappings.properties.semantic_field.index_options.dense_vector.ef_construction": 90 } + - match: { "test-index-options.mappings.properties.semantic_field.index_options.dense_vector.confidence_interval": 1.0 } + + - do: + catch: /Cannot update parameter \[index_options\]/ + indices.put_mapping: + index: test-index-options + body: + properties: + semantic_field: + type: semantic_text + inference_id: dense-inference-id + index_options: + dense_vector: + type: int8_flat + + - match: { status: 400 } diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_field_mapping_bwc.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_field_mapping_bwc.yml index fa935ac450f88..b089d8c439330 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_field_mapping_bwc.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_field_mapping_bwc.yml @@ -3,6 +3,55 @@ setup: cluster_features: "gte_v8.15.0" reason: semantic_text introduced in 8.15.0 + - do: + inference.put: + task_type: sparse_embedding + inference_id: sparse-inference-id + body: > + { + "service": "test_service", + "service_settings": { + "model": "my_model", + "api_key": "abc64" + }, + "task_settings": { + } + } + + - do: + inference.put: + task_type: text_embedding + inference_id: dense-inference-id + body: > + { + "service": "text_embedding_test_service", + "service_settings": { + "model": "my_model", + "dimensions": 4, + "similarity": "cosine", + "api_key": "abc64" + }, + "task_settings": { + } + } + + - do: + inference.put: + task_type: text_embedding + inference_id: dense-inference-id-compatible-with-bbq + body: > + { + "service": "text_embedding_test_service", + "service_settings": { + "model": "my_model", + "dimensions": 64, + "similarity": "cosine", + "api_key": "abc64" + }, + "task_settings": { + } + } + - do: indices.create: index: test-index @@ -148,7 +197,7 @@ setup: - match: { "test-index.mappings.properties.dense_field.type": semantic_text } - match: { "test-index.mappings.properties.dense_field.inference_id": dense-inference-id } - - length: { "test-index.mappings.properties.dense_field": 2 } + - not_exists: test-index.mappings.properties.dense_field.model_settings - do: index: @@ -164,11 +213,17 @@ setup: dimensions: 4 similarity: cosine element_type: float + index_options: + dense_vector: + type: int8_hnsw + m: 16 + ef_construction: 100 chunks: - text: "these are not the droids you're looking for" - embeddings: [0.04673296958208084, -0.03237321600317955, -0.02543032355606556, 0.056035321205854416] + embeddings: [ 0.04673296958208084, -0.03237321600317955, -0.02543032355606556, 0.056035321205854416 ] - text: "He's free to go around" - embeddings: [0.00641461368650198, -0.0016253676731139421, -0.05126338079571724, 0.053438711911439896] + embeddings: [ 0.00641461368650198, -0.0016253676731139421, -0.05126338079571724, 0.053438711911439896 ] + refresh: true # Checks mapping is updated when first doc arrives - do: @@ -178,7 +233,69 @@ setup: - match: { "test-index.mappings.properties.dense_field.type": semantic_text } - match: { "test-index.mappings.properties.dense_field.inference_id": dense-inference-id } - match: { "test-index.mappings.properties.dense_field.model_settings.task_type": text_embedding } - - length: { "test-index.mappings.properties.dense_field": 3 } + - exists: test-index.mappings.properties.dense_field.model_settings + +--- +"Indexes dense vector document with bbq compatible model": + - requires: + cluster_features: "semantic_text.index_options" + reason: index_options introduced in 8.19.0 + + - do: + indices.create: + index: test-index-options-with-bbq + body: + settings: + index: + mapping: + semantic_text: + use_legacy_format: true + mappings: + properties: + dense_field: + type: semantic_text + inference_id: dense-inference-id-compatible-with-bbq + + # Checks vector mapping is not updated until first doc arrives + - do: + indices.get_mapping: + index: test-index-options-with-bbq + + - match: { "test-index-options-with-bbq.mappings.properties.dense_field.type": semantic_text } + - match: { "test-index-options-with-bbq.mappings.properties.dense_field.inference_id": dense-inference-id-compatible-with-bbq } + - not_exists: test-index-options-with-bbq.mappings.properties.dense_field.index_options + - not_exists: test-index-options-with-bbq.mappings.properties.dense_field.model_settings + + - do: + index: + index: test-index-options-with-bbq + id: doc_2 + body: + dense_field: + text: "these are not the droids you're looking for. He's free to go around" + inference: + inference_id: dense-inference-id-compatible-with-bbq + model_settings: + task_type: text_embedding + dimensions: 64 + similarity: cosine + element_type: float + chunks: + - text: "these are not the droids you're looking for" + embeddings: [ 0.05, -0.03, -0.03, 0.06, 0.01, -0.02, 0.07, 0.02, -0.04, 0.03, 0.00, 0.05, -0.06, 0.04, -0.01, 0.02, -0.05, 0.01, 0.03, -0.02, 0.06, -0.04, 0.00, 0.05, -0.03, 0.02, 0.01, -0.01, 0.04, -0.06, 0.03, 0.02, -0.02, 0.06, -0.01, 0.00, 0.04, -0.05, 0.01, 0.03, -0.04, 0.02, -0.03, 0.05, -0.02, 0.01, 0.03, -0.06, 0.04, 0.00, -0.01, 0.06, -0.03, 0.02, 0.01, -0.04, 0.05, -0.01, 0.00, 0.04, -0.05, 0.02, 0.03, -0.02 ] + - text: "He's free to go around" + embeddings: [ 0.05, -0.03, -0.03, 0.06, 0.01, -0.02, 0.07, 0.02, -0.04, 0.03, 0.00, 0.05, -0.06, 0.04, -0.01, 0.02, -0.05, 0.01, 0.03, -0.02, 0.06, -0.04, 0.00, 0.05, -0.03, 0.02, 0.01, -0.01, 0.04, -0.06, 0.03, 0.02, -0.02, 0.06, -0.01, 0.00, 0.04, -0.05, 0.01, 0.03, -0.04, 0.02, -0.03, 0.05, -0.02, 0.01, 0.03, -0.06, 0.04, 0.00, -0.01, 0.06, -0.03, 0.02, 0.01, -0.04, 0.05, -0.01, 0.00, 0.04, -0.05, 0.02, 0.03, -0.02 ] + refresh: true + + # Checks mapping is updated when first doc arrives + - do: + indices.get_mapping: + index: test-index-options-with-bbq + + - match: { "test-index-options-with-bbq.mappings.properties.dense_field.type": semantic_text } + - match: { "test-index-options-with-bbq.mappings.properties.dense_field.inference_id": dense-inference-id-compatible-with-bbq } + - match: { "test-index-options-with-bbq.mappings.properties.dense_field.model_settings.task_type": text_embedding } + - not_exists: test-index-options-with-bbq.mappings.properties.dense_field.index_options --- "Field caps with text embedding": @@ -330,3 +447,292 @@ setup: - not_exists: fields.dense_field.inference.chunks.text - not_exists: fields.dense_field.inference.chunks - not_exists: fields.dense_field.inference +--- +"Users can set dense vector index options and index documents using those options": + - requires: + cluster_features: "semantic_text.index_options" + reason: Index options introduced in 8.19.0 + + - do: + indices.create: + index: test-index-options + body: + settings: + index: + mapping: + semantic_text: + use_legacy_format: true + mappings: + properties: + semantic_field: + type: semantic_text + inference_id: dense-inference-id + index_options: + dense_vector: + type: int8_hnsw + m: 20 + ef_construction: 100 + confidence_interval: 1.0 + + - do: + indices.get_mapping: + index: test-index-options + + - match: { "test-index-options.mappings.properties.semantic_field.index_options.dense_vector.type": "int8_hnsw" } + - match: { "test-index-options.mappings.properties.semantic_field.index_options.dense_vector.m": 20 } + - match: { "test-index-options.mappings.properties.semantic_field.index_options.dense_vector.ef_construction": 100 } + - match: { "test-index-options.mappings.properties.semantic_field.index_options.dense_vector.confidence_interval": 1.0 } + + - do: + index: + index: test-index_options + id: doc_1 + body: + dense_field: + text: "these are not the droids you're looking for. He's free to go around" + inference: + inference_id: dense-inference-id + model_settings: + task_type: text_embedding + dimensions: 4 + similarity: cosine + element_type: float + index_options: + dense_vector: + type: int8_hnsw + m: 20 + ef_construction: 100 + confidence_interval: 1.0 + chunks: + - text: "these are not the droids you're looking for" + embeddings: [ 0.04673296958208084, -0.03237321600317955, -0.02543032355606556, 0.056035321205854416 ] + - text: "He's free to go around" + embeddings: [ 0.00641461368650198, -0.0016253676731139421, -0.05126338079571724, 0.053438711911439896 ] + refresh: true + + - do: + indices.get_mapping: + index: test-index-options + + - match: { "test-index-options.mappings.properties.semantic_field.index_options.dense_vector.type": "int8_hnsw" } + - match: { "test-index-options.mappings.properties.semantic_field.index_options.dense_vector.m": 20 } + - match: { "test-index-options.mappings.properties.semantic_field.index_options.dense_vector.ef_construction": 100 } + - match: { "test-index-options.mappings.properties.semantic_field.index_options.dense_vector.confidence_interval": 1.0 } + +--- +"Specifying incompatible dense vector index options will fail": + - requires: + cluster_features: "semantic_text.index_options" + reason: Index options introduced in 8.19.0 + + - do: + catch: /unsupported parameters/ + indices.create: + index: test-incompatible-index-options + body: + settings: + index: + mapping: + semantic_text: + use_legacy_format: true + mappings: + properties: + semantic_field: + type: semantic_text + inference_id: dense-inference-id + index_options: + dense_vector: + type: bbq_flat + ef_construction: 100 + +--- +"Specifying unsupported index option types will fail": + - requires: + cluster_features: "semantic_text.index_options" + reason: Index options introduced in 8.19.0 + + - do: + catch: /Unsupported index options type/ + indices.create: + index: test-invalid-index-options-dense + body: + settings: + index: + mapping: + semantic_text: + use_legacy_format: true + mappings: + properties: + semantic_field: + type: semantic_text + inference_id: dense-inference-id + index_options: + dense_vector: + type: foo + - do: + catch: bad_request + indices.create: + index: test-invalid-index-options-sparse + body: + settings: + index: + mapping: + semantic_text: + use_legacy_format: true + mappings: + properties: + semantic_field: + type: semantic_text + index_options: + sparse_vector: + type: int8_hnsw + +--- +"Index option type is required": + - requires: + cluster_features: "semantic_text.index_options" + reason: Index options introduced in 8.19.0 + + - do: + catch: /Required type/ + indices.create: + index: test-invalid-index-options-dense + body: + settings: + index: + mapping: + semantic_text: + use_legacy_format: true + mappings: + properties: + semantic_field: + type: semantic_text + inference_id: dense-inference-id + index_options: + dense_vector: + foo: bar + +--- +"Specifying index options requires model information": + - requires: + cluster_features: "semantic_text.index_options" + reason: Index options introduced in 8.19.0 + + - do: + catch: /Model settings must be set to validate index options/ + indices.create: + index: my-custom-semantic-index + body: + settings: + index: + mapping: + semantic_text: + use_legacy_format: true + mappings: + properties: + semantic_field: + type: semantic_text + inference_id: nonexistent-inference-id + index_options: + dense_vector: + type: int8_hnsw + + - match: { status: 400 } + + - do: + indices.create: + index: my-custom-semantic-index + body: + settings: + index: + mapping: + semantic_text: + use_legacy_format: true + mappings: + properties: + semantic_field: + type: semantic_text + inference_id: nonexistent-inference-id + + - do: + indices.get_mapping: + index: my-custom-semantic-index + + - match: { "my-custom-semantic-index.mappings.properties.semantic_field.type": semantic_text } + - match: { "my-custom-semantic-index.mappings.properties.semantic_field.inference_id": nonexistent-inference-id } + - not_exists: my-custom-semantic-index.mappings.properties.semantic_field.index_options + +--- +"Updating index options": + - requires: + cluster_features: "semantic_text.index_options" + reason: Index options introduced in 8.19.0 + + - do: + indices.create: + index: test-index-options + body: + settings: + index: + mapping: + semantic_text: + use_legacy_format: true + mappings: + properties: + semantic_field: + type: semantic_text + inference_id: dense-inference-id + index_options: + dense_vector: + type: int8_hnsw + m: 16 + ef_construction: 100 + confidence_interval: 1.0 + + - do: + indices.get_mapping: + index: test-index-options + + - match: { "test-index-options.mappings.properties.semantic_field.index_options.dense_vector.type": "int8_hnsw" } + - match: { "test-index-options.mappings.properties.semantic_field.index_options.dense_vector.m": 16 } + - match: { "test-index-options.mappings.properties.semantic_field.index_options.dense_vector.ef_construction": 100 } + - match: { "test-index-options.mappings.properties.semantic_field.index_options.dense_vector.confidence_interval": 1.0 } + + - do: + indices.put_mapping: + index: test-index-options + body: + properties: + semantic_field: + type: semantic_text + inference_id: dense-inference-id + index_options: + dense_vector: + type: int8_hnsw + m: 20 + ef_construction: 90 + confidence_interval: 1.0 + + - do: + indices.get_mapping: + index: test-index-options + + - match: { "test-index-options.mappings.properties.semantic_field.index_options.dense_vector.type": "int8_hnsw" } + - match: { "test-index-options.mappings.properties.semantic_field.index_options.dense_vector.m": 20 } + - match: { "test-index-options.mappings.properties.semantic_field.index_options.dense_vector.ef_construction": 90 } + - match: { "test-index-options.mappings.properties.semantic_field.index_options.dense_vector.confidence_interval": 1.0 } + + - do: + catch: /Cannot update parameter \[index_options\]/ + indices.put_mapping: + index: test-index-options + body: + properties: + semantic_field: + type: semantic_text + inference_id: dense-inference-id + index_options: + dense_vector: + type: int8_flat + + - match: { status: 400 }