From 4be9290a977a602e6c8d4a13fd2846d51bbaa71e Mon Sep 17 00:00:00 2001 From: "Mark J. Hoy" Date: Fri, 6 Jun 2025 15:02:16 -0400 Subject: [PATCH 01/37] Initial checkin of refactored index_options code --- .../org/elasticsearch/TransportVersions.java | 2 + .../elasticsearch/index/IndexVersions.java | 2 + .../vectors/SparseVectorFieldMapper.java | 192 +++++++++++++++++- .../mapper/vectors/TokenPruningConfig.java | 43 ++++ .../ml/search/SparseVectorQueryBuilder.java | 45 +++- 5 files changed, 268 insertions(+), 16 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java index d83a4992f97b0..595054c1184e7 100644 --- a/server/src/main/java/org/elasticsearch/TransportVersions.java +++ b/server/src/main/java/org/elasticsearch/TransportVersions.java @@ -191,6 +191,7 @@ static TransportVersion def(int id) { public static final TransportVersion ILM_ADD_SKIP_SETTING_8_19 = def(8_841_0_43); public static final TransportVersion ESQL_REGEX_MATCH_WITH_CASE_INSENSITIVITY_8_19 = def(8_841_0_44); public static final TransportVersion ESQL_QUERY_PLANNING_DURATION_8_19 = def(8_841_0_45); + public static final TransportVersion SPARSE_VECTOR_FIELD_PRUNING_OPTIONS_8_19 = def(8_842_0_45); public static final TransportVersion V_9_0_0 = def(9_000_0_09); public static final TransportVersion INITIAL_ELASTICSEARCH_9_0_1 = def(9_000_0_10); public static final TransportVersion INITIAL_ELASTICSEARCH_9_0_2 = def(9_000_0_11); @@ -286,6 +287,7 @@ static TransportVersion def(int id) { public static final TransportVersion ILM_ADD_SKIP_SETTING = def(9_089_0_00); public static final TransportVersion ML_INFERENCE_MISTRAL_CHAT_COMPLETION_ADDED = def(9_090_0_00); public static final TransportVersion IDP_CUSTOM_SAML_ATTRIBUTES_ALLOW_LIST = def(9_091_0_00); + public static final TransportVersion SPARSE_VECTOR_FIELD_PRUNING_OPTIONS = def(9_092_0_00); /* * STOP! READ THIS FIRST! No, really, diff --git a/server/src/main/java/org/elasticsearch/index/IndexVersions.java b/server/src/main/java/org/elasticsearch/index/IndexVersions.java index f32d4d7a2a302..b3f92e67c25ea 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexVersions.java +++ b/server/src/main/java/org/elasticsearch/index/IndexVersions.java @@ -142,6 +142,7 @@ private static Version parseUnchecked(String version) { public static final IndexVersion DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ_BACKPORT_8_X = def(8_530_0_00, Version.LUCENE_9_12_1); public static final IndexVersion SEMANTIC_TEXT_DEFAULTS_TO_BBQ_BACKPORT_8_X = def(8_531_0_00, Version.LUCENE_9_12_1); public static final IndexVersion INDEX_INT_SORT_INT_TYPE_8_19 = def(8_532_0_00, Version.LUCENE_9_12_1); + public static final IndexVersion SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT_BACKPORT_8_X = def(8_533_0_00, Version.LUCENE_9_12_1); public static final IndexVersion UPGRADE_TO_LUCENE_10_0_0 = def(9_000_0_00, Version.LUCENE_10_0_0); public static final IndexVersion LOGSDB_DEFAULT_IGNORE_DYNAMIC_BEYOND_LIMIT = def(9_001_0_00, Version.LUCENE_10_0_0); public static final IndexVersion TIME_BASED_K_ORDERED_DOC_ID = def(9_002_0_00, Version.LUCENE_10_0_0); @@ -171,6 +172,7 @@ private static Version parseUnchecked(String version) { public static final IndexVersion DEFAULT_TO_ACORN_HNSW_FILTER_HEURISTIC = def(9_026_0_00, Version.LUCENE_10_2_1); public static final IndexVersion SEQ_NO_WITHOUT_POINTS = def(9_027_0_00, Version.LUCENE_10_2_1); public static final IndexVersion INDEX_INT_SORT_INT_TYPE = def(9_028_0_00, Version.LUCENE_10_2_1); + public static final IndexVersion SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT = def(9_029_0_00, Version.LUCENE_10_2_1); /* * STOP! READ THIS FIRST! No, really, diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java index 0aeb3495608d6..cb3ee47b962d0 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java @@ -22,6 +22,9 @@ import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.logging.DeprecationCategory; import org.elasticsearch.common.lucene.Lucene; +import org.elasticsearch.common.xcontent.support.XContentMapValues; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.features.NodeFeature; import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.IndexVersions; import org.elasticsearch.index.analysis.NamedAnalyzer; @@ -31,6 +34,7 @@ import org.elasticsearch.index.mapper.FieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.MapperBuilderContext; +import org.elasticsearch.index.mapper.MappingParserContext; import org.elasticsearch.index.mapper.SourceLoader; import org.elasticsearch.index.mapper.SourceValueFetcher; import org.elasticsearch.index.mapper.TextSearchInfo; @@ -40,17 +44,27 @@ import org.elasticsearch.inference.WeightedTokensUtils; import org.elasticsearch.search.fetch.StoredFieldsSpec; import org.elasticsearch.search.lookup.Source; +import org.elasticsearch.xcontent.ConstructingObjectParser; +import org.elasticsearch.xcontent.DeprecationHandler; +import org.elasticsearch.xcontent.NamedXContentRegistry; +import org.elasticsearch.xcontent.ParseField; +import org.elasticsearch.xcontent.ToXContent; import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentParser; import org.elasticsearch.xcontent.XContentParser.Token; +import org.elasticsearch.xcontent.XContentType; +import org.elasticsearch.xcontent.support.MapXContentParser; import java.io.IOException; import java.io.UncheckedIOException; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.stream.Stream; import static org.elasticsearch.index.query.AbstractQueryBuilder.DEFAULT_BOOST; +import static org.elasticsearch.xcontent.ConstructingObjectParser.optionalConstructorArg; /** * A {@link FieldMapper} that exposes Lucene's {@link FeatureField} as a sparse @@ -59,6 +73,7 @@ public class SparseVectorFieldMapper extends FieldMapper { public static final String CONTENT_TYPE = "sparse_vector"; + public static final String SPARSE_VECTOR_INDEX_OPTIONS = "index_options"; static final String ERROR_MESSAGE_7X = "[sparse_vector] field type in old 7.x indices is allowed to " + "contain [sparse_vector] fields, but they cannot be indexed or searched."; @@ -67,6 +82,10 @@ public class SparseVectorFieldMapper extends FieldMapper { static final IndexVersion NEW_SPARSE_VECTOR_INDEX_VERSION = IndexVersions.NEW_SPARSE_VECTOR; static final IndexVersion SPARSE_VECTOR_IN_FIELD_NAMES_INDEX_VERSION = IndexVersions.SPARSE_VECTOR_IN_FIELD_NAMES_SUPPORT; + static final IndexVersion SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION = IndexVersions.SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT; + static final IndexVersion SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION_8_X = IndexVersions.SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT_BACKPORT_8_X; + + public static final NodeFeature SPARSE_VECTOR_INDEX_OPTIONS_FEATURE = new NodeFeature("sparse_vector.index_options_supported"); private static SparseVectorFieldMapper toType(FieldMapper in) { return (SparseVectorFieldMapper) in; @@ -75,6 +94,15 @@ private static SparseVectorFieldMapper toType(FieldMapper in) { public static class Builder extends FieldMapper.Builder { private final Parameter stored = Parameter.storeParam(m -> toType(m).fieldType().isStored(), false); private final Parameter> meta = Parameter.metaParam(); + private final Parameter indexOptions = new Parameter<>( + SPARSE_VECTOR_INDEX_OPTIONS, + true, + () -> null, + (n, c, o) -> parseIndexOptions(c, o), + m -> toType(m).fieldType().indexOptions, + XContentBuilder::field, + Objects::toString + ).acceptsNull(); public Builder(String name) { super(name); @@ -87,19 +115,54 @@ public Builder setStored(boolean value) { @Override protected Parameter[] getParameters() { - return new Parameter[] { stored, meta }; + return new Parameter[] { stored, meta, indexOptions }; } @Override public SparseVectorFieldMapper build(MapperBuilderContext context) { return new SparseVectorFieldMapper( leafName(), - new SparseVectorFieldType(context.buildFullName(leafName()), stored.getValue(), meta.getValue()), + new SparseVectorFieldType(context.buildFullName(leafName()), stored.getValue(), meta.getValue(), indexOptions.getValue()), builderParams(this, context) ); } } + public IndexOptions getIndexOptions() { + return fieldType().getIndexOptions(); + } + + private static final ConstructingObjectParser INDEX_OPTIONS_PARSER = new ConstructingObjectParser<>( + SPARSE_VECTOR_INDEX_OPTIONS, + args -> new IndexOptions((Boolean) args[0], (TokenPruningConfig) args[1]) + ); + + static { + INDEX_OPTIONS_PARSER.declareBoolean(optionalConstructorArg(), IndexOptions.PRUNE_FIELD_NAME); + INDEX_OPTIONS_PARSER.declareObject(optionalConstructorArg(), TokenPruningConfig.PARSER, IndexOptions.PRUNING_CONFIG_FIELD_NAME); + } + + private static SparseVectorFieldMapper.IndexOptions parseIndexOptions(MappingParserContext context, Object propNode) { + if (propNode == null) { + return null; + } + + Map indexOptionsMap = XContentMapValues.nodeMapValue(propNode, SPARSE_VECTOR_INDEX_OPTIONS); + + XContentParser parser = new MapXContentParser( + NamedXContentRegistry.EMPTY, + DeprecationHandler.IGNORE_DEPRECATIONS, + indexOptionsMap, + XContentType.JSON + ); + + try { + return INDEX_OPTIONS_PARSER.parse(parser, null); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + public static final TypeParser PARSER = new TypeParser((n, c) -> { if (c.indexVersionCreated().before(PREVIOUS_SPARSE_VECTOR_INDEX_VERSION)) { deprecationLogger.warn(DeprecationCategory.MAPPINGS, "sparse_vector", ERROR_MESSAGE_7X); @@ -111,9 +174,19 @@ public SparseVectorFieldMapper build(MapperBuilderContext context) { }, notInMultiFields(CONTENT_TYPE)); public static final class SparseVectorFieldType extends MappedFieldType { + private final IndexOptions indexOptions; public SparseVectorFieldType(String name, boolean isStored, Map meta) { + this(name, isStored, meta, null); + } + + public SparseVectorFieldType(String name, boolean isStored, Map meta, @Nullable SparseVectorFieldMapper.IndexOptions indexOptions) { super(name, true, isStored, false, TextSearchInfo.SIMPLE_MATCH_ONLY, meta); + this.indexOptions = indexOptions; + } + + public IndexOptions getIndexOptions() { + return indexOptions; } @Override @@ -155,14 +228,54 @@ public Query finalizeSparseVectorQuery( SearchExecutionContext context, String fieldName, List queryVectors, - boolean shouldPruneTokens, - TokenPruningConfig tokenPruningConfig + Boolean shouldPruneTokensFromQuery, + TokenPruningConfig tokenPruningConfigFromQuery ) throws IOException { - return (shouldPruneTokens) - ? WeightedTokensUtils.queryBuilderWithPrunedTokens(fieldName, tokenPruningConfig, queryVectors, this, context) + TokenPruningConfig pruningConfig = null; + + if (shouldPruneTokensFromQuery != null) { + // if this is not null, the query is overriding the index config + pruningConfig = shouldPruneTokensFromQuery ? tokenPruningConfigFromQuery : null; + } else { + // check and see if we explicitly do not prune in the index_options + boolean explicitlyDoNotPrune = this.indexOptions != null + && this.indexOptions.prune != null + && this.indexOptions.prune == false; + + if (explicitlyDoNotPrune == false) { + // get the explicit pruning config from the index_options if available + pruningConfig = this.indexOptions != null ? this.indexOptions.pruningConfig : null; + + // if we're still null, set the default based on the index version + // newer index versions default to true, while older is false + pruningConfig = pruningConfig == null ? getDefaultPruningConfig(context) : pruningConfig; + } + } + + return (pruningConfig != null) + ? WeightedTokensUtils.queryBuilderWithPrunedTokens(fieldName, pruningConfig, queryVectors, this, context) : WeightedTokensUtils.queryBuilderWithAllTokens(fieldName, queryVectors, this, context); } + private TokenPruningConfig getDefaultPruningConfig(SearchExecutionContext context) { + IndexVersion indexVersion = context.indexVersionCreated(); + + if (indexVersion.after(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION)) { + // default pruning for 9.1.0+ is true for this index + return new TokenPruningConfig(); + } + + if (indexVersion.between(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION_8_X, IndexVersions.UPGRADE_TO_LUCENE_10_0_0)) + { + // default pruning for 8.19.0+ is true for this index + return new TokenPruningConfig(); + } + + // the index version is before we added index_options support + // so pruning is off by default + return null; + } + private static String indexedValueForSearch(Object value) { if (value instanceof BytesRef) { return ((BytesRef) value).utf8ToString(); @@ -378,4 +491,71 @@ public void reset() { } } + public static class IndexOptions implements ToXContent { + public static final ParseField PRUNE_FIELD_NAME = new ParseField("prune"); + public static final ParseField PRUNING_CONFIG_FIELD_NAME = new ParseField("pruning_config"); + + final Boolean prune; + final TokenPruningConfig pruningConfig; + + IndexOptions(@Nullable Boolean prune, @Nullable TokenPruningConfig pruningConfig) { + if (pruningConfig != null && (prune == null || prune == false)) { + throw new IllegalArgumentException( + "[" + + SPARSE_VECTOR_INDEX_OPTIONS + + "] field [" + + PRUNING_CONFIG_FIELD_NAME.getPreferredName() + + "] should only be set if [" + + PRUNE_FIELD_NAME.getPreferredName() + + "] is set to true" + ); + } + + this.prune = prune; + this.pruningConfig = pruningConfig; + } + + public Boolean getPrune() { + return prune; + } + + public TokenPruningConfig getPruningConfig() { + return pruningConfig; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + + if (prune != null) { + builder.field(PRUNE_FIELD_NAME.getPreferredName(), prune); + } + if (pruningConfig != null) { + builder.field(PRUNING_CONFIG_FIELD_NAME.getPreferredName(), pruningConfig); + } + + builder.endObject(); + return builder; + } + + @Override + public final boolean equals(Object other) { + if (other == this) { + return true; + } + + if (other == null || getClass() != other.getClass()) { + return false; + } + + IndexOptions otherAsIndexOptions = (IndexOptions) other; + return Objects.equals(prune, otherAsIndexOptions.prune) && Objects.equals(pruningConfig, otherAsIndexOptions.pruningConfig); + } + + @Override + public final int hashCode() { + return Objects.hash(prune, pruningConfig); + } + } + } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/TokenPruningConfig.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/TokenPruningConfig.java index 6deeae327890b..e83dd2be30275 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/TokenPruningConfig.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/TokenPruningConfig.java @@ -14,16 +14,25 @@ import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.xcontent.ConstructingObjectParser; +import org.elasticsearch.xcontent.DeprecationHandler; +import org.elasticsearch.xcontent.NamedXContentRegistry; import org.elasticsearch.xcontent.ParseField; import org.elasticsearch.xcontent.ToXContentObject; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentParser; +import org.elasticsearch.xcontent.XContentType; +import org.elasticsearch.xcontent.support.MapXContentParser; import java.io.IOException; +import java.io.UncheckedIOException; import java.util.Locale; +import java.util.Map; import java.util.Objects; import java.util.Set; +import static org.elasticsearch.xcontent.ConstructingObjectParser.optionalConstructorArg; + public class TokenPruningConfig implements Writeable, ToXContentObject { public static final String PRUNING_CONFIG_FIELD = "pruning_config"; public static final ParseField TOKENS_FREQ_RATIO_THRESHOLD = new ParseField("tokens_freq_ratio_threshold"); @@ -176,4 +185,38 @@ public static TokenPruningConfig fromXContent(XContentParser parser) throws IOEx } return new TokenPruningConfig(ratioThreshold, weightThreshold, onlyScorePrunedTokens); } + + public static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( + PRUNING_CONFIG_FIELD, + args -> new TokenPruningConfig( + args[0] == null ? DEFAULT_TOKENS_FREQ_RATIO_THRESHOLD : (Float) args[0], + args[1] == null ? DEFAULT_TOKENS_WEIGHT_THRESHOLD : (Float) args[1], + args[2] != null && (Boolean) args[2] + ) + ); + + static { + PARSER.declareFloat(optionalConstructorArg(), TOKENS_FREQ_RATIO_THRESHOLD); + PARSER.declareFloat(optionalConstructorArg(), TOKENS_WEIGHT_THRESHOLD); + PARSER.declareBoolean(optionalConstructorArg(), ONLY_SCORE_PRUNED_TOKENS_FIELD); + } + + public static TokenPruningConfig parseFromMap(Map pruningConfigMap) { + if (pruningConfigMap == null) { + return null; + } + + try { + XContentParser parser = new MapXContentParser( + NamedXContentRegistry.EMPTY, + DeprecationHandler.IGNORE_DEPRECATIONS, + pruningConfigMap, + XContentType.JSON + ); + + return PARSER.parse(parser, null); + } catch (IOException ioEx) { + throw new UncheckedIOException(ioEx); + } + } } diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryBuilder.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryBuilder.java index f5815a3bfde23..bad85bb0a357a 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryBuilder.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryBuilder.java @@ -17,6 +17,8 @@ import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.core.Nullable; +import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.index.IndexVersions; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.vectors.SparseVectorFieldMapper; import org.elasticsearch.index.mapper.vectors.TokenPruningConfig; @@ -60,11 +62,14 @@ public class SparseVectorQueryBuilder extends AbstractQueryBuilder queryVectors; private final String inferenceId; private final String query; - private final boolean shouldPruneTokens; + private final Boolean shouldPruneTokens; private final SetOnce weightedTokensSupplier; @@ -84,13 +89,11 @@ public SparseVectorQueryBuilder( @Nullable TokenPruningConfig tokenPruningConfig ) { this.fieldName = Objects.requireNonNull(fieldName, "[" + NAME + "] requires a [" + FIELD_FIELD.getPreferredName() + "]"); - this.shouldPruneTokens = (shouldPruneTokens != null ? shouldPruneTokens : DEFAULT_PRUNE); + this.shouldPruneTokens = shouldPruneTokens; this.queryVectors = queryVectors; this.inferenceId = inferenceId; this.query = query; - this.tokenPruningConfig = (tokenPruningConfig != null - ? tokenPruningConfig - : (this.shouldPruneTokens ? new TokenPruningConfig() : null)); + this.tokenPruningConfig = tokenPruningConfig; this.weightedTokensSupplier = null; // Preserve BWC error messaging @@ -127,7 +130,15 @@ public SparseVectorQueryBuilder( public SparseVectorQueryBuilder(StreamInput in) throws IOException { super(in); this.fieldName = in.readString(); - this.shouldPruneTokens = in.readBoolean(); + + if (in.getTransportVersion().isPatchFrom(SPARSE_VECTOR_FIELD_PRUNING_OPTIONS_8_19) + || in.getTransportVersion().onOrAfter(SPARSE_VECTOR_FIELD_PRUNING_OPTIONS) + ) { + this.shouldPruneTokens = in.readOptionalBoolean(); + } else { + this.shouldPruneTokens = in.readBoolean(); + } + this.queryVectors = in.readOptionalCollectionAsList(WeightedToken::new); this.inferenceId = in.readOptionalString(); this.query = in.readOptionalString(); @@ -161,7 +172,7 @@ public String getQuery() { return query; } - public boolean shouldPruneTokens() { + public Boolean shouldPruneTokens() { return shouldPruneTokens; } @@ -176,7 +187,15 @@ protected void doWriteTo(StreamOutput out) throws IOException { } out.writeString(fieldName); - out.writeBoolean(shouldPruneTokens); + + if (out.getTransportVersion().isPatchFrom(SPARSE_VECTOR_FIELD_PRUNING_OPTIONS_8_19) + || out.getTransportVersion().onOrAfter(SPARSE_VECTOR_FIELD_PRUNING_OPTIONS) + ) { + out.writeOptionalBoolean(shouldPruneTokens); + } else { + out.writeBoolean(shouldPruneTokens); + } + out.writeOptionalCollection(queryVectors); out.writeOptionalString(inferenceId); out.writeOptionalString(query); @@ -199,7 +218,9 @@ protected void doXContent(XContentBuilder builder, Params params) throws IOExcep } builder.field(QUERY_FIELD.getPreferredName(), query); } + if (shouldPruneTokens != null) { builder.field(PRUNE_FIELD.getPreferredName(), shouldPruneTokens); + } if (tokenPruningConfig != null) { builder.field(PRUNING_CONFIG_FIELD.getPreferredName(), tokenPruningConfig); } @@ -231,7 +252,9 @@ protected Query doToQuery(SearchExecutionContext context) throws IOException { protected QueryBuilder doRewrite(QueryRewriteContext queryRewriteContext) { if (queryVectors != null) { return this; - } else if (weightedTokensSupplier != null) { + } + + if (weightedTokensSupplier != null) { TextExpansionResults textExpansionResults = weightedTokensSupplier.get(); if (textExpansionResults == null) { return this; // No results yet @@ -245,7 +268,9 @@ protected QueryBuilder doRewrite(QueryRewriteContext queryRewriteContext) { shouldPruneTokens, tokenPruningConfig ); - } else if (inferenceId == null) { + } + + if (inferenceId == null) { // Edge case, where inference_id was not specified in the request, // but we did not intercept this and rewrite to a query o field with // pre-configured inference. So we trap here and output a nicer error message. From f0f02794a1dc8d87c64ead8660c4908c8f8f5004 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Fri, 6 Jun 2025 19:11:28 +0000 Subject: [PATCH 02/37] [CI] Auto commit changes from spotless --- .../mapper/vectors/SparseVectorFieldMapper.java | 13 +++++++++---- .../core/ml/search/SparseVectorQueryBuilder.java | 10 +++------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java index cb3ee47b962d0..219f68c677a83 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java @@ -83,7 +83,8 @@ public class SparseVectorFieldMapper extends FieldMapper { static final IndexVersion NEW_SPARSE_VECTOR_INDEX_VERSION = IndexVersions.NEW_SPARSE_VECTOR; static final IndexVersion SPARSE_VECTOR_IN_FIELD_NAMES_INDEX_VERSION = IndexVersions.SPARSE_VECTOR_IN_FIELD_NAMES_SUPPORT; static final IndexVersion SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION = IndexVersions.SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT; - static final IndexVersion SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION_8_X = IndexVersions.SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT_BACKPORT_8_X; + static final IndexVersion SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION_8_X = + IndexVersions.SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT_BACKPORT_8_X; public static final NodeFeature SPARSE_VECTOR_INDEX_OPTIONS_FEATURE = new NodeFeature("sparse_vector.index_options_supported"); @@ -180,7 +181,12 @@ public SparseVectorFieldType(String name, boolean isStored, Map this(name, isStored, meta, null); } - public SparseVectorFieldType(String name, boolean isStored, Map meta, @Nullable SparseVectorFieldMapper.IndexOptions indexOptions) { + public SparseVectorFieldType( + String name, + boolean isStored, + Map meta, + @Nullable SparseVectorFieldMapper.IndexOptions indexOptions + ) { super(name, true, isStored, false, TextSearchInfo.SIMPLE_MATCH_ONLY, meta); this.indexOptions = indexOptions; } @@ -265,8 +271,7 @@ private TokenPruningConfig getDefaultPruningConfig(SearchExecutionContext contex return new TokenPruningConfig(); } - if (indexVersion.between(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION_8_X, IndexVersions.UPGRADE_TO_LUCENE_10_0_0)) - { + if (indexVersion.between(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION_8_X, IndexVersions.UPGRADE_TO_LUCENE_10_0_0)) { // default pruning for 8.19.0+ is true for this index return new TokenPruningConfig(); } diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryBuilder.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryBuilder.java index bad85bb0a357a..cc67bc28d675e 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryBuilder.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryBuilder.java @@ -17,8 +17,6 @@ import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.core.Nullable; -import org.elasticsearch.index.IndexVersion; -import org.elasticsearch.index.IndexVersions; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.vectors.SparseVectorFieldMapper; import org.elasticsearch.index.mapper.vectors.TokenPruningConfig; @@ -132,8 +130,7 @@ public SparseVectorQueryBuilder(StreamInput in) throws IOException { this.fieldName = in.readString(); if (in.getTransportVersion().isPatchFrom(SPARSE_VECTOR_FIELD_PRUNING_OPTIONS_8_19) - || in.getTransportVersion().onOrAfter(SPARSE_VECTOR_FIELD_PRUNING_OPTIONS) - ) { + || in.getTransportVersion().onOrAfter(SPARSE_VECTOR_FIELD_PRUNING_OPTIONS)) { this.shouldPruneTokens = in.readOptionalBoolean(); } else { this.shouldPruneTokens = in.readBoolean(); @@ -189,8 +186,7 @@ protected void doWriteTo(StreamOutput out) throws IOException { out.writeString(fieldName); if (out.getTransportVersion().isPatchFrom(SPARSE_VECTOR_FIELD_PRUNING_OPTIONS_8_19) - || out.getTransportVersion().onOrAfter(SPARSE_VECTOR_FIELD_PRUNING_OPTIONS) - ) { + || out.getTransportVersion().onOrAfter(SPARSE_VECTOR_FIELD_PRUNING_OPTIONS)) { out.writeOptionalBoolean(shouldPruneTokens); } else { out.writeBoolean(shouldPruneTokens); @@ -219,7 +215,7 @@ protected void doXContent(XContentBuilder builder, Params params) throws IOExcep builder.field(QUERY_FIELD.getPreferredName(), query); } if (shouldPruneTokens != null) { - builder.field(PRUNE_FIELD.getPreferredName(), shouldPruneTokens); + builder.field(PRUNE_FIELD.getPreferredName(), shouldPruneTokens); } if (tokenPruningConfig != null) { builder.field(PRUNING_CONFIG_FIELD.getPreferredName(), tokenPruningConfig); From 3281cc23bd6f78a4c6bdd342b6a96451404ab665 Mon Sep 17 00:00:00 2001 From: "Mark J. Hoy" Date: Mon, 9 Jun 2025 09:54:41 -0400 Subject: [PATCH 03/37] initial unit testing --- .../vectors/SparseVectorFieldMapper.java | 2 +- .../vectors/SparseVectorFieldMapperTests.java | 238 ++++++++++++++++++ 2 files changed, 239 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java index 219f68c677a83..086d98cc7cd7c 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java @@ -266,7 +266,7 @@ public Query finalizeSparseVectorQuery( private TokenPruningConfig getDefaultPruningConfig(SearchExecutionContext context) { IndexVersion indexVersion = context.indexVersionCreated(); - if (indexVersion.after(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION)) { + if (indexVersion.onOrAfter(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION)) { // default pruning for 9.1.0+ is true for this index return new TokenPruningConfig(); } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java index b2379ba579204..912963fba8bd1 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java @@ -14,11 +14,16 @@ import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.LeafReader; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; import org.apache.lucene.store.Directory; import org.apache.lucene.tests.index.RandomIndexWriter; import org.elasticsearch.common.Strings; import org.elasticsearch.common.compress.CompressedXContent; +import org.elasticsearch.core.CheckedConsumer; +import org.elasticsearch.core.Nullable; import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.IndexVersions; import org.elasticsearch.index.mapper.DocumentMapper; @@ -28,7 +33,10 @@ import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.mapper.MapperTestCase; import org.elasticsearch.index.mapper.ParsedDocument; +import org.elasticsearch.index.query.SearchExecutionContext; +import org.elasticsearch.inference.WeightedToken; import org.elasticsearch.search.lookup.Source; +import org.elasticsearch.search.vectors.SparseVectorQueryWrapper; import org.elasticsearch.test.index.IndexVersionUtils; import org.elasticsearch.xcontent.XContentBuilder; import org.hamcrest.Matchers; @@ -36,6 +44,8 @@ import java.io.IOException; import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -67,6 +77,30 @@ protected void minimalMapping(XContentBuilder b) throws IOException { b.field("type", "sparse_vector"); } + protected void mappingWithIndexOptions(XContentBuilder b) throws IOException { + b.field("type", "sparse_vector"); + b.startObject("index_options"); + { + b.field("prune", true); + b.startObject("pruning_config"); + { + b.field("tokens_freq_ratio_threshold", TokenPruningConfig.DEFAULT_TOKENS_FREQ_RATIO_THRESHOLD); + b.field("tokens_weight_threshold", TokenPruningConfig.DEFAULT_TOKENS_WEIGHT_THRESHOLD); + } + b.endObject(); + } + b.endObject(); + } + + protected void mappingWithIndexOptionsPruneFalse(XContentBuilder b) throws IOException { + b.field("type", "sparse_vector"); + b.startObject("index_options"); + { + b.field("prune", false); + } + b.endObject(); + } + @Override protected boolean supportsStoredFields() { return false; @@ -318,6 +352,210 @@ public void testSparseVectorUnsupportedIndex() throws Exception { assertThat(e.getMessage(), containsString(SparseVectorFieldMapper.ERROR_MESSAGE_8X)); } + private void withSearchExecutionContext(MapperService mapperService, CheckedConsumer consumer) + throws IOException { + for (boolean store : new boolean[] { true, false }) { + var mapper = mapperService.documentMapper(); + try (Directory directory = newDirectory()) { + RandomIndexWriter iw = new RandomIndexWriter(random(), directory); + var sourceToParse = source(this::writeField); + ParsedDocument doc1 = mapper.parse(sourceToParse); + iw.addDocument(doc1.rootDoc()); + iw.close(); + + try (DirectoryReader reader = wrapInMockESDirectoryReader(DirectoryReader.open(directory))) { + LeafReader leafReader = getOnlyLeafReader(reader); + var searchContext = createSearchExecutionContext(mapperService, new IndexSearcher(leafReader)); + consumer.accept(searchContext); + } + } + } + } + + public void testTypeQueryFinalizationWithRandomOptions() throws Exception { + for (int i = 0; i < 20; i++) { + runTestTypeQueryFinalization(randomBoolean(), randomBoolean(), randomBoolean(), randomBoolean(), randomBoolean()); + } + } + + public void testTypeQueryFinalizationDefaultsCurrentVersion() throws Exception { + IndexVersion version = IndexVersions.SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT; + MapperService mapperService = createMapperService(version, fieldMapping(this::minimalMapping)); + // query should be pruned by default on newer index versions + performTypeQueryFinalizationTest(mapperService, null, null, null, true); + } + + public void testTypeQueryFinalizationDefaultsPreviousVersion() throws Exception { + IndexVersion version = IndexVersionUtils.randomVersionBetween( + random(), + IndexVersions.UPGRADE_TO_LUCENE_10_2_1, + IndexVersions.SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT + ); + MapperService mapperService = createMapperService(version, fieldMapping(this::minimalMapping)); + // query should _not_ be pruned by default on older index versions + performTypeQueryFinalizationTest(mapperService, null, null, null, false); + } + + public void testTypeQueryFinalizationWithIndexExplicit() throws Exception { + IndexVersion version = IndexVersions.SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT; + MapperService mapperService = createMapperService(version, fieldMapping(this::mappingWithIndexOptions)); + // query should be pruned via explicit index options + performTypeQueryFinalizationTest( + mapperService, + new SparseVectorFieldMapper.IndexOptions(true, new TokenPruningConfig()), + null, + null, + true + ); + } + + public void testTypeQueryFinalizationWithIndexExplicitDoNotPrune() throws Exception { + IndexVersion version = IndexVersions.SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT; + MapperService mapperService = createMapperService(version, fieldMapping(this::mappingWithIndexOptionsPruneFalse)); + // query should be pruned via explicit index options + performTypeQueryFinalizationTest(mapperService, new SparseVectorFieldMapper.IndexOptions(false, null), null, null, false); + } + + private void performTypeQueryFinalizationTest( + MapperService mapperService, + SparseVectorFieldMapper.IndexOptions indexOptions, + @Nullable Boolean queryPrune, + @Nullable TokenPruningConfig queryTokenPruningConfig, + boolean queryShouldBePruned + ) throws IOException { + withSearchExecutionContext(mapperService, (context) -> { + SparseVectorFieldMapper.SparseVectorFieldType ft = new SparseVectorFieldMapper.SparseVectorFieldType( + "field", + false, + Collections.emptyMap(), + indexOptions + ); + Query finalizedQuery = ft.finalizeSparseVectorQuery(context, "field", QUERY_VECTORS, queryPrune, queryTokenPruningConfig); + + if (queryShouldBePruned) { + assertQueryWasPruned(finalizedQuery); + } else { + assertQueryWasNotPruned(finalizedQuery); + } + }); + } + + private void assertQueryWasPruned(Query query) { + assertQueryHasClauseCount(query, 0); + } + + private void assertQueryWasNotPruned(Query query) { + assertQueryHasClauseCount(query, QUERY_VECTORS.size()); + } + + private void assertQueryHasClauseCount(Query query, int clauseCount) { + SparseVectorQueryWrapper queryWrapper = (SparseVectorQueryWrapper) query; + var termsQuery = queryWrapper.getTermsQuery(); + assertNotNull(termsQuery); + var booleanQuery = (BooleanQuery) termsQuery; + Collection clauses = booleanQuery.getClauses(BooleanClause.Occur.SHOULD); + assertThat(clauses.size(), equalTo(clauseCount)); + } + + private void runTestTypeQueryFinalization( + boolean usePreviousIndex, + boolean useIndexOptionsDefaults, + boolean explicitIndexOptionsDoNotPrune, + boolean queryOverridesPruning, + boolean queryOverrideExplicitFalse + ) throws IOException { + IndexVersion version = usePreviousIndex + ? IndexVersionUtils.randomVersionBetween( + random(), + IndexVersions.UPGRADE_TO_LUCENE_10_2_1, + IndexVersions.SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT + ) + : IndexVersions.SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT; + + MapperService mapperService = getMapperServiceForRandomizedFinalizationTest( + version, + useIndexOptionsDefaults, + explicitIndexOptionsDoNotPrune + ); + + SparseVectorFieldMapper.IndexOptions indexOptions = getIndexOptionsQueryFinalization( + usePreviousIndex, + useIndexOptionsDefaults, + explicitIndexOptionsDoNotPrune + ); + + Boolean shouldQueryPrune = queryOverridesPruning ? (queryOverrideExplicitFalse == false) : null; + + TokenPruningConfig queryPruningConfig = queryOverridesPruning && queryOverrideExplicitFalse == false + ? new TokenPruningConfig() + : null; + + boolean resultShouldBePruned = true; + if (queryOverridesPruning && queryOverrideExplicitFalse) { + resultShouldBePruned = false; + } else if (queryOverridesPruning == false && (usePreviousIndex || explicitIndexOptionsDoNotPrune)) { + resultShouldBePruned = false; + } + + try { + performTypeQueryFinalizationTest(mapperService, indexOptions, shouldQueryPrune, queryPruningConfig, resultShouldBePruned); + } catch (AssertionError e) { + String message = "performTypeQueryFinalizationTest failed using parameters: " + + "usePreviousIndex: " + + usePreviousIndex + + ", useIndexOptionsDefaults: " + + useIndexOptionsDefaults + + ", explicitIndexOptionsDoNotPrune: " + + explicitIndexOptionsDoNotPrune + + ", queryOverridesPruning: " + + queryOverridesPruning + + ", queryOverrideExplicitFalse: " + + queryOverrideExplicitFalse; + throw new AssertionError(message, e); + } + + } + + private SparseVectorFieldMapper.IndexOptions getIndexOptionsQueryFinalization( + boolean usePreviousIndex, + boolean useIndexOptionsDefaults, + boolean explicitIndexOptionsDoNotPrune + ) { + if (usePreviousIndex) { + return null; + } + + if (useIndexOptionsDefaults && explicitIndexOptionsDoNotPrune == false) { + return null; + } + + return explicitIndexOptionsDoNotPrune + ? new SparseVectorFieldMapper.IndexOptions(false, null) + : new SparseVectorFieldMapper.IndexOptions(true, new TokenPruningConfig()); + } + + private MapperService getMapperServiceForRandomizedFinalizationTest( + IndexVersion indexVersion, + boolean useIndexOptionsDefaults, + boolean explicitIndexOptionsDoNotPrune + ) throws IOException { + if (useIndexOptionsDefaults) { + return createMapperService(indexVersion, fieldMapping(this::minimalMapping)); + } + + if (explicitIndexOptionsDoNotPrune) { + return createMapperService(indexVersion, fieldMapping(this::mappingWithIndexOptionsPruneFalse)); + } + + return createMapperService(indexVersion, fieldMapping(this::mappingWithIndexOptions)); + } + + private static List QUERY_VECTORS = List.of( + new WeightedToken("pugs", 0.5f), + new WeightedToken("cats", 0.4f), + new WeightedToken("is", 0.1f) + ); + /** * Handles float/double conversion when reading/writing with xcontent by converting all numbers to floats. */ From 854a78ec3377016765b28a7577df7b808fca2171 Mon Sep 17 00:00:00 2001 From: "Mark J. Hoy" Date: Mon, 9 Jun 2025 19:16:41 -0400 Subject: [PATCH 04/37] complete unit tests; add yaml tests --- .../vectors/SparseVectorFieldMapperTests.java | 226 ++++++++- .../SemanticTextHighlighterTests.java | 4 +- .../test/multi_cluster/50_sparse_vector.yml | 438 +++++++++++++++++ .../test/remote_cluster/50_sparse_vector.yml | 437 +++++++++++++++++ .../test/ml/sparse_vector_search.yml | 444 ++++++++++++++++++ 5 files changed, 1541 insertions(+), 8 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java index 912963fba8bd1..5f747d78d9c0b 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java @@ -39,6 +39,7 @@ import org.elasticsearch.search.vectors.SparseVectorQueryWrapper; import org.elasticsearch.test.index.IndexVersionUtils; import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentParseException; import org.hamcrest.Matchers; import org.junit.AssumptionViolatedException; @@ -340,7 +341,7 @@ protected IndexVersion boostNotAllowedIndexVersion() { return NEW_SPARSE_VECTOR_INDEX_VERSION; } - public void testSparseVectorUnsupportedIndex() throws Exception { + public void testSparseVectorUnsupportedIndex() { IndexVersion version = IndexVersionUtils.randomVersionBetween( random(), PREVIOUS_SPARSE_VECTOR_INDEX_VERSION, @@ -352,6 +353,175 @@ public void testSparseVectorUnsupportedIndex() throws Exception { assertThat(e.getMessage(), containsString(SparseVectorFieldMapper.ERROR_MESSAGE_8X)); } + public void testPruneMustBeBoolean() { + Exception e = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> { + b.field("type", "sparse_vector"); + b.startObject("index_options"); + b.field("prune", "othervalue"); + b.endObject(); + }))); + assertThat(e.getMessage(), containsString("[index_options] failed to parse field [prune]")); + assertThat(e.getCause().getCause(), instanceOf(IllegalArgumentException.class)); + assertThat(e.getCause().getCause().getMessage(), containsString("Failed to parse value [othervalue] as only [true] or [false] are allowed.")); + } + + public void testPruningConfigurationIsMap() { + Exception e = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> { + b.field("type", "sparse_vector"); + b.startObject("index_options"); + b.field("prune", true); + b.field("pruning_config", "this_is_not_a_map"); + b.endObject(); + }))); + assertThat(e.getMessage(), containsString("[index_options] pruning_config doesn't support values of type:")); + assertThat(e.getCause(), instanceOf(XContentParseException.class)); + assertThat( + e.getCause().getMessage(), + containsString("[index_options] pruning_config doesn't support values of type: VALUE_STRING") + ); + } + + public void testWithIndexOptionsPruningConfigPruneRequired() throws Exception { + + Exception eTestPruneIsFalse = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> { + b.field("type", "sparse_vector"); + b.startObject("index_options"); + b.field("prune", false); + b.startObject("pruning_config"); + b.field("tokens_freq_ratio_threshold", 5.0); + b.field("tokens_weight_threshold", 0.4); + b.endObject(); + b.endObject(); + }))); + assertThat(eTestPruneIsFalse.getMessage(), containsString("[index_options] failed to parse field [pruning_config]")); + assertThat(eTestPruneIsFalse.getCause().getCause().getCause(), instanceOf(IllegalArgumentException.class)); + assertThat( + eTestPruneIsFalse.getCause().getCause().getCause().getMessage(), + containsString("[index_options] field [pruning_config] should only be set if [prune] is set to true") + ); + + Exception eTestPruneIsMissing = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> { + b.field("type", "sparse_vector"); + b.startObject("index_options"); + b.startObject("pruning_config"); + b.field("tokens_freq_ratio_threshold", 5.0); + b.field("tokens_weight_threshold", 0.4); + b.endObject(); + b.endObject(); + }))); + assertThat( + eTestPruneIsMissing.getMessage(), + containsString("Failed to parse mapping: Failed to build [index_options] after last required field arrived") + ); + assertThat(eTestPruneIsMissing.getCause().getCause(), instanceOf(IllegalArgumentException.class)); + assertThat( + eTestPruneIsMissing.getCause().getCause().getMessage(), + containsString("[index_options] field [pruning_config] should only be set if [prune] is set to true") + ); + } + + public void testTokensFreqRatioCorrect() { + Exception eTestInteger = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> { + b.field("type", "sparse_vector"); + b.startObject("index_options"); + b.field("prune", true); + b.startObject("pruning_config"); + b.field("tokens_freq_ratio_threshold", "notaninteger"); + b.endObject(); + b.endObject(); + }))); + assertThat( + eTestInteger.getMessage(), + containsString("Failed to parse mapping: [0:0] [index_options] failed to parse field [pruning_config]") + ); + assertThat(eTestInteger.getCause().getCause(), instanceOf(XContentParseException.class)); + assertThat( + eTestInteger.getCause().getCause().getMessage(), + containsString("[pruning_config] failed to parse field [tokens_freq_ratio_threshold]") + ); + assertThat(eTestInteger.getCause().getCause().getCause(), instanceOf(NumberFormatException.class)); + assertThat(eTestInteger.getCause().getCause().getCause().getMessage(), containsString("For input string: \"notaninteger\"")); + + Exception eTestRangeLower = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> { + b.field("type", "sparse_vector"); + b.startObject("index_options"); + b.field("prune", true); + b.startObject("pruning_config"); + b.field("tokens_freq_ratio_threshold", -2); + b.endObject(); + b.endObject(); + }))); + assertThat(eTestRangeLower.getMessage(), containsString("[index_options] failed to parse field [pruning_config]")); + assertThat(eTestRangeLower.getCause().getCause().getCause(), instanceOf(IllegalArgumentException.class)); + assertThat( + eTestRangeLower.getCause().getCause().getCause().getMessage(), + containsString("[tokens_freq_ratio_threshold] must be between [1] and [100], got -2.0") + ); + + Exception eTestRangeHigher = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> { + b.field("type", "sparse_vector"); + b.startObject("index_options"); + b.field("prune", true); + b.startObject("pruning_config"); + b.field("tokens_freq_ratio_threshold", 101); + b.endObject(); + b.endObject(); + }))); + assertThat(eTestRangeHigher.getMessage(), containsString("[index_options] failed to parse field [pruning_config]")); + assertThat(eTestRangeHigher.getCause().getCause().getCause(), instanceOf(IllegalArgumentException.class)); + assertThat( + eTestRangeHigher.getCause().getCause().getCause().getMessage(), + containsString("[tokens_freq_ratio_threshold] must be between [1] and [100], got 101.0") + ); + } + + public void testTokensWeightThresholdCorrect() { + Exception eTestDouble = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> { + b.field("type", "sparse_vector"); + b.startObject("index_options"); + b.field("prune", true); + b.startObject("pruning_config"); + b.field("tokens_weight_threshold", "notadouble"); + b.endObject(); + b.endObject(); + }))); + assertThat(eTestDouble.getMessage(), containsString("[index_options] failed to parse field [pruning_config]")); + assertThat(eTestDouble.getCause().getCause().getCause(), instanceOf(NumberFormatException.class)); + assertThat(eTestDouble.getCause().getCause().getCause().getMessage(), containsString("For input string: \"notadouble\"")); + + Exception eTestRangeLower = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> { + b.field("type", "sparse_vector"); + b.startObject("index_options"); + b.field("prune", true); + b.startObject("pruning_config"); + b.field("tokens_weight_threshold", -0.1); + b.endObject(); + b.endObject(); + }))); + assertThat(eTestRangeLower.getMessage(), containsString("[index_options] failed to parse field [pruning_config]")); + assertThat(eTestRangeLower.getCause().getCause().getCause(), instanceOf(IllegalArgumentException.class)); + assertThat( + eTestRangeLower.getCause().getCause().getCause().getMessage(), + containsString("[tokens_weight_threshold] must be between 0 and 1") + ); + + Exception eTestRangeHigher = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> { + b.field("type", "sparse_vector"); + b.startObject("index_options"); + b.field("prune", true); + b.startObject("pruning_config"); + b.field("tokens_weight_threshold", 1.1); + b.endObject(); + b.endObject(); + }))); + assertThat(eTestRangeHigher.getMessage(), containsString("[index_options] failed to parse field [pruning_config]")); + assertThat(eTestRangeHigher.getCause().getCause().getCause(), instanceOf(IllegalArgumentException.class)); + assertThat( + eTestRangeHigher.getCause().getCause().getCause().getMessage(), + containsString("[tokens_weight_threshold] must be between 0 and 1") + ); + } + private void withSearchExecutionContext(MapperService mapperService, CheckedConsumer consumer) throws IOException { for (boolean store : new boolean[] { true, false }) { @@ -374,13 +544,20 @@ private void withSearchExecutionContext(MapperService mapperService, CheckedCons public void testTypeQueryFinalizationWithRandomOptions() throws Exception { for (int i = 0; i < 20; i++) { - runTestTypeQueryFinalization(randomBoolean(), randomBoolean(), randomBoolean(), randomBoolean(), randomBoolean()); + runTestTypeQueryFinalization( + randomBoolean(), // usePreviousIndex + randomBoolean(), // useIndexOptionsDefaults + randomBoolean(), // explicitIndexOptionsDoNotPrune + randomBoolean(), // queryOverridesPruning + randomBoolean() // queryOverrideExplicitFalse + ); } } public void testTypeQueryFinalizationDefaultsCurrentVersion() throws Exception { - IndexVersion version = IndexVersions.SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT; + IndexVersion version = IndexVersion.current(); MapperService mapperService = createMapperService(version, fieldMapping(this::minimalMapping)); + // query should be pruned by default on newer index versions performTypeQueryFinalizationTest(mapperService, null, null, null, true); } @@ -392,13 +569,15 @@ public void testTypeQueryFinalizationDefaultsPreviousVersion() throws Exception IndexVersions.SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT ); MapperService mapperService = createMapperService(version, fieldMapping(this::minimalMapping)); + // query should _not_ be pruned by default on older index versions performTypeQueryFinalizationTest(mapperService, null, null, null, false); } public void testTypeQueryFinalizationWithIndexExplicit() throws Exception { - IndexVersion version = IndexVersions.SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT; + IndexVersion version = IndexVersion.current(); MapperService mapperService = createMapperService(version, fieldMapping(this::mappingWithIndexOptions)); + // query should be pruned via explicit index options performTypeQueryFinalizationTest( mapperService, @@ -410,10 +589,45 @@ public void testTypeQueryFinalizationWithIndexExplicit() throws Exception { } public void testTypeQueryFinalizationWithIndexExplicitDoNotPrune() throws Exception { - IndexVersion version = IndexVersions.SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT; + IndexVersion version = IndexVersion.current(); MapperService mapperService = createMapperService(version, fieldMapping(this::mappingWithIndexOptionsPruneFalse)); + // query should be pruned via explicit index options - performTypeQueryFinalizationTest(mapperService, new SparseVectorFieldMapper.IndexOptions(false, null), null, null, false); + performTypeQueryFinalizationTest( + mapperService, + new SparseVectorFieldMapper.IndexOptions(false, null), + null, + null, + false + ); + } + + public void testTypeQueryFinalizationQueryOverridesPruning() throws Exception { + IndexVersion version = IndexVersion.current(); + MapperService mapperService = createMapperService(version, fieldMapping(this::mappingWithIndexOptionsPruneFalse)); + + // query should still be pruned due to query builder setting it + performTypeQueryFinalizationTest( + mapperService, + new SparseVectorFieldMapper.IndexOptions(false, null), + true, + new TokenPruningConfig(), + true + ); + } + + public void testTypeQueryFinalizationQueryOverridesPruningOff() throws Exception { + IndexVersion version = IndexVersion.current(); + MapperService mapperService = createMapperService(version, fieldMapping(this::mappingWithIndexOptionsPruneFalse)); + + // query should not pruned due to query builder setting it + performTypeQueryFinalizationTest( + mapperService, + new SparseVectorFieldMapper.IndexOptions(true, new TokenPruningConfig()), + false, + null, + false + ); } private void performTypeQueryFinalizationTest( diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/highlight/SemanticTextHighlighterTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/highlight/SemanticTextHighlighterTests.java index ef0429c9250d7..67c6d8da52a88 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/highlight/SemanticTextHighlighterTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/highlight/SemanticTextHighlighterTests.java @@ -140,7 +140,7 @@ public void testSparseVector() throws Exception { tokens, null, null, - null, + false, null ); NestedQueryBuilder nestedQueryBuilder = new NestedQueryBuilder(fieldType.getChunksField().fullPath(), sparseQuery, ScoreMode.Max); @@ -183,7 +183,7 @@ public void testNoSemanticField() throws Exception { tokens, null, null, - null, + false, null ); var query = new BoolQueryBuilder().should(sparseQuery).should(new MatchAllQueryBuilder()); diff --git a/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/multi_cluster/50_sparse_vector.yml b/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/multi_cluster/50_sparse_vector.yml index 20ab78a48ae1b..c6d9371ebc01e 100644 --- a/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/multi_cluster/50_sparse_vector.yml +++ b/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/multi_cluster/50_sparse_vector.yml @@ -113,6 +113,20 @@ teardown: model_id: "text_expansion_model" ignore: 404 + - do: + headers: + Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" #test_user credentials + Content-Type: application/json + indices.delete: + index: ["sparse_vector_pruning_test", "test-sparse-vector-without-pruning", "test-sparse-vector-with-pruning"] + ignore: 404 + + - do: + headers: + Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" #test_user credentials + Content-Type: application/json + indices.refresh: { } + --- "Test sparse_vector search": - do: @@ -184,3 +198,427 @@ teardown: - match: { hits.total.value: 5 } - match: { hits.hits.0._source.source_text: "the octopus comforter smells" } + + +--- +"Check sparse_vector token pruning index_options mappings": + + - requires: + cluster_features: 'sparse_vector.index_options_supported' + reason: "sparse_vector token pruning index options added support in 8.19" + - skip: + features: headers + + - do: + headers: + Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" #test_user credentials + Content-Type: application/json + indices.create: + index: sparse_vector_pruning_test + body: + mappings: + properties: + text: + type: text + ml.tokens: + type: sparse_vector + index_options: + prune: true + pruning_config: + tokens_freq_ratio_threshold: 1.0 + tokens_weight_threshold: 0.4 + + - match: { acknowledged: true } + + - do: + headers: + Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" #test_user credentials + Content-Type: application/json + indices.get_mapping: + index: sparse_vector_pruning_test + + - match: { sparse_vector_pruning_test.mappings.properties.ml.properties.tokens.index_options.prune: true } + - match: { sparse_vector_pruning_test.mappings.properties.ml.properties.tokens.index_options.pruning_config.tokens_freq_ratio_threshold: 1.0 } + - match: { sparse_vector_pruning_test.mappings.properties.ml.properties.tokens.index_options.pruning_config.tokens_weight_threshold: 0.4 } + +--- +"Check sparse_vector token pruning index_options mappings defaults": + + - requires: + cluster_features: 'sparse_vector.index_options_supported' + reason: "sparse_vector token pruning index options added support in 8.19" + - skip: + features: headers + + - do: + headers: + Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" #test_user credentials + Content-Type: application/json + indices.create: + index: sparse_vector_pruning_test + body: + mappings: + properties: + text: + type: text + ml.tokens: + type: sparse_vector + + - match: { acknowledged: true } + + - do: + headers: + Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" #test_user credentials + Content-Type: application/json + indices.get_mapping: + index: sparse_vector_pruning_test + + - not_exists: sparse_vector_pruning_test.mappings.properties.ml.properties.tokens.index_options + +--- +"Check sparse_vector token pruning index_options prune missing do not allow config": + + - requires: + cluster_features: 'sparse_vector.index_options_supported' + reason: "sparse_vector token pruning index options added support in 8.19" + - skip: + features: headers + + - do: + catch: /\[index_options\] field \[pruning_config\] should only be set if \[prune\] is set to true/ + headers: + Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" #test_user credentials + Content-Type: application/json + indices.create: + index: sparse_vector_pruning_test + body: + mappings: + properties: + text: + type: text + ml.tokens: + type: sparse_vector + index_options: + pruning_config: + tokens_freq_ratio_threshold: 1.0 + tokens_weight_threshold: 0.4 + + - match: { status: 400 } + +--- +"Check sparse_vector token pruning index_options prune false do not allow config": + + - requires: + cluster_features: 'sparse_vector.index_options_supported' + reason: "sparse_vector token pruning index options added support in 8.19" + - skip: + features: headers + + - do: + catch: /\[index_options\] field \[pruning_config\] should only be set if \[prune\] is set to true/ + headers: + Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" #test_user credentials + Content-Type: application/json + indices.create: + index: sparse_vector_pruning_test + body: + mappings: + properties: + text: + type: text + ml.tokens: + type: sparse_vector + index_options: + prune: false + pruning_config: + tokens_freq_ratio_threshold: 1.0 + tokens_weight_threshold: 0.4 + + - match: { status: 400 } + +--- +"Check sparse_vector token pruning index_options tokens freq out of bounds": + + - requires: + cluster_features: 'sparse_vector.index_options_supported' + reason: "sparse_vector token pruning index options added support in 8.19" + - skip: + features: headers + + - do: + catch: /\[tokens_freq_ratio_threshold\] must be between \[1\] and \[100\]/ + headers: + Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" #test_user credentials + Content-Type: application/json + indices.create: + index: sparse_vector_pruning_test + body: + mappings: + properties: + text: + type: text + ml.tokens: + type: sparse_vector + index_options: + prune: true + pruning_config: + tokens_freq_ratio_threshold: 101.0 + tokens_weight_threshold: 0.4 + + - match: { status: 400 } + +--- +"Check sparse_vector token pruning index_options tokens weight out of bounds": + + - requires: + cluster_features: 'sparse_vector.index_options_supported' + reason: "sparse_vector token pruning index options added support in 8.19" + - skip: + features: headers + + - do: + catch: /\[tokens_weight_threshold\] must be between 0 and 1/ + headers: + Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" #test_user credentials + Content-Type: application/json + indices.create: + index: sparse_vector_pruning_test + body: + mappings: + properties: + text: + type: text + ml.tokens: + type: sparse_vector + index_options: + prune: true + pruning_config: + tokens_freq_ratio_threshold: 5.0 + tokens_weight_threshold: 3.5 + + - match: { status: 400 } + +--- +"Check sparse_vector token pruning index_options in query": + + - requires: + cluster_features: 'sparse_vector.index_options_supported' + reason: "sparse_vector token pruning index options added support in 8.19" + - skip: + features: headers + + - do: + headers: + Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" #test_user credentials + Content-Type: application/json + indices.create: + index: test-sparse-vector-with-pruning + body: + mappings: + properties: + content_embedding: + type: sparse_vector + index_options: + prune: true + pruning_config: + tokens_freq_ratio_threshold: 1 + tokens_weight_threshold: 1.0 + + - match: { acknowledged: true } + + - do: + headers: + Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" #test_user credentials + Content-Type: application/json + indices.create: + index: test-sparse-vector-without-pruning + body: + mappings: + properties: + content_embedding: + type: sparse_vector + index_options: + prune: false + + - match: { acknowledged: true } + + - do: + headers: + Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" #test_user credentials + Content-Type: application/json + bulk: + index: test-sparse-vector-with-pruning + refresh: true + body: | + {"index": { "_id": "1" }} + {"content_embedding":{"cheese": 2.671405,"is": 0.11809908,"comet": 0.26088917}} + {"index": { "_id": "2" }} + {"content_embedding":{"planet": 2.3438394,"is": 0.54600334,"astronomy": 0.36015007,"moon": 0.20022368}} + {"index": { "_id": "3" }} + {"content_embedding":{"is": 0.6891394,"globe": 0.484035,"ocean": 0.080102935,"underground": 0.053516876}} + - do: + headers: + Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" #test_user credentials + Content-Type: application/json + bulk: + index: test-sparse-vector-without-pruning + refresh: true + body: | + {"index": { "_id": "1" }} + {"content_embedding":{"cheese": 2.671405,"is": 0.11809908,"comet": 0.26088917}} + {"index": { "_id": "2" }} + {"content_embedding":{"planet": 2.3438394,"is": 0.54600334,"astronomy": 0.36015007,"moon": 0.20022368}} + {"index": { "_id": "3" }} + {"content_embedding":{"is": 0.6891394,"globe": 0.484035,"ocean": 0.080102935,"underground": 0.053516876}} + - do: + search: + index: test-sparse-vector-without-pruning + body: + query: + sparse_vector: + field: content_embedding + query_vector: + cheese: 0.5 + comet: 0.5 + globe: 0.484035 + ocean: 0.080102935 + underground: 0.053516876 + is: 0.54600334 + + - match: { hits.total.value: 3 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.1._id: "3" } + - match: { hits.hits.2._id: "2" } + + - do: + search: + index: test-sparse-vector-with-pruning + body: + query: + sparse_vector: + field: content_embedding + query_vector: + cheese: 0.5 + comet: 0.5 + globe: 0.484035 + ocean: 0.080102935 + underground: 0.053516876 + is: 0.54600334 + + - match: { hits.total.value: 2 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.1._id: "3" } + + - do: + search: + index: test-sparse-vector-without-pruning + body: + query: + sparse_vector: + field: content_embedding + query_vector: + cheese: 0.5 + comet: 0.5 + globe: 0.484035 + ocean: 0.080102935 + underground: 0.053516876 + is: 0.54600334 + prune: true + pruning_config: + tokens_freq_ratio_threshold: 1 + tokens_weight_threshold: 1.0 + + - match: { hits.total.value: 2 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.1._id: "3" } + + - do: + search: + index: test-sparse-vector-with-pruning + body: + query: + sparse_vector: + field: content_embedding + query_vector: + cheese: 0.5 + comet: 0.5 + globe: 0.484035 + ocean: 0.080102935 + underground: 0.053516876 + is: 0.54600334 + prune: false + + - match: { hits.total.value: 3 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.1._id: "3" } + - match: { hits.hits.2._id: "2" } + +--- +"Check sparse_vector should prune by default": + + - requires: + cluster_features: 'sparse_vector.index_options_supported' + reason: "sparse_vector token pruning index options added support in 8.19" + - skip: + features: headers + + - do: + headers: + Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" #test_user credentials + Content-Type: application/json + indices.create: + index: test-sparse-vector-pruning-default + body: + mappings: + properties: + content_embedding: + type: sparse_vector + + - match: { acknowledged: true } + + - do: + headers: + Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" #test_user credentials + Content-Type: application/json + bulk: + index: test-sparse-vector-pruning-default + refresh: true + body: | + {"index": { "_id": "1" }} + {"content_embedding":{"cheese": 2.671405,"is": 0.11809908,"comet": 0.26088917}} + {"index": { "_id": "2" }} + {"content_embedding":{"planet": 2.3438394,"is": 0.14600334,"astronomy": 0.36015007,"moon": 0.20022368}} + {"index": { "_id": "3" }} + {"content_embedding":{"is": 0.1891394,"globe": 0.484035,"ocean": 0.080102935,"underground": 0.053516876}} + {"index": { "_id": "4" }} + {"content_embedding":{"is": 0.1891394}} + {"index": { "_id": "5" }} + {"content_embedding":{"is": 0.1891394}} + {"index": { "_id": "6" }} + {"content_embedding":{"is": 0.1891394}} + {"index": { "_id": "7" }} + {"content_embedding":{"is": 0.1891394}} + {"index": { "_id": "8" }} + {"content_embedding":{"is": 0.1891394}} + {"index": { "_id": "9" }} + {"content_embedding":{"is": 0.1891394}} + {"index": { "_id": "10" }} + {"content_embedding":{"is": 0.1891394}} + {"index": { "_id": "11" }} + {"content_embedding":{"is": 0.1891394}} + {"index": { "_id": "12" }} + {"content_embedding":{"is": 0.1891394}} + - do: + search: + index: test-sparse-vector-pruning-default + body: + query: + sparse_vector: + field: content_embedding + query_vector: + pugs: 0.5 + cats: 0.5 + is: 0.04600334 + + - match: { hits.total.value: 0 } + diff --git a/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/remote_cluster/50_sparse_vector.yml b/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/remote_cluster/50_sparse_vector.yml index e3b59bdaf50bd..0c0464a412a2f 100644 --- a/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/remote_cluster/50_sparse_vector.yml +++ b/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/remote_cluster/50_sparse_vector.yml @@ -112,6 +112,20 @@ teardown: model_id: "text_expansion_model" ignore: 404 + - do: + headers: + Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" #test_user credentials + Content-Type: application/json + indices.delete: + index: ["sparse_vector_pruning_test", "test-sparse-vector-without-pruning", "test-sparse-vector-with-pruning"] + ignore: 404 + + - do: + headers: + Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" #test_user credentials + Content-Type: application/json + indices.refresh: { } + --- "Test sparse_vector search": - do: @@ -183,3 +197,426 @@ teardown: - match: { hits.total.value: 5 } - match: { hits.hits.0._source.source_text: "the octopus comforter smells" } + +--- +"Check sparse_vector token pruning index_options mappings": + + - requires: + cluster_features: 'sparse_vector.index_options_supported' + reason: "sparse_vector token pruning index options added support in 8.19" + - skip: + features: headers + + - do: + headers: + Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" #test_user credentials + Content-Type: application/json + indices.create: + index: sparse_vector_pruning_test + body: + mappings: + properties: + text: + type: text + ml.tokens: + type: sparse_vector + index_options: + prune: true + pruning_config: + tokens_freq_ratio_threshold: 1.0 + tokens_weight_threshold: 0.4 + + - match: { acknowledged: true } + + - do: + headers: + Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" #test_user credentials + Content-Type: application/json + indices.get_mapping: + index: sparse_vector_pruning_test + + - match: { sparse_vector_pruning_test.mappings.properties.ml.properties.tokens.index_options.prune: true } + - match: { sparse_vector_pruning_test.mappings.properties.ml.properties.tokens.index_options.pruning_config.tokens_freq_ratio_threshold: 1.0 } + - match: { sparse_vector_pruning_test.mappings.properties.ml.properties.tokens.index_options.pruning_config.tokens_weight_threshold: 0.4 } + +--- +"Check sparse_vector token pruning index_options mappings defaults": + + - requires: + cluster_features: 'sparse_vector.index_options_supported' + reason: "sparse_vector token pruning index options added support in 8.19" + - skip: + features: headers + + - do: + headers: + Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" #test_user credentials + Content-Type: application/json + indices.create: + index: sparse_vector_pruning_test + body: + mappings: + properties: + text: + type: text + ml.tokens: + type: sparse_vector + + - match: { acknowledged: true } + + - do: + headers: + Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" #test_user credentials + Content-Type: application/json + indices.get_mapping: + index: sparse_vector_pruning_test + + - not_exists: sparse_vector_pruning_test.mappings.properties.ml.properties.tokens.index_options + +--- +"Check sparse_vector token pruning index_options prune missing do not allow config": + + - requires: + cluster_features: 'sparse_vector.index_options_supported' + reason: "sparse_vector token pruning index options added support in 8.19" + - skip: + features: headers + + - do: + catch: /\[index_options\] field \[pruning_config\] should only be set if \[prune\] is set to true/ + headers: + Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" #test_user credentials + Content-Type: application/json + indices.create: + index: sparse_vector_pruning_test + body: + mappings: + properties: + text: + type: text + ml.tokens: + type: sparse_vector + index_options: + pruning_config: + tokens_freq_ratio_threshold: 1.0 + tokens_weight_threshold: 0.4 + + - match: { status: 400 } + +--- +"Check sparse_vector token pruning index_options prune false do not allow config": + + - requires: + cluster_features: 'sparse_vector.index_options_supported' + reason: "sparse_vector token pruning index options added support in 8.19" + - skip: + features: headers + + - do: + catch: /\[index_options\] field \[pruning_config\] should only be set if \[prune\] is set to true/ + headers: + Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" #test_user credentials + Content-Type: application/json + indices.create: + index: sparse_vector_pruning_test + body: + mappings: + properties: + text: + type: text + ml.tokens: + type: sparse_vector + index_options: + prune: false + pruning_config: + tokens_freq_ratio_threshold: 1.0 + tokens_weight_threshold: 0.4 + + - match: { status: 400 } + +--- +"Check sparse_vector token pruning index_options tokens freq out of bounds": + + - requires: + cluster_features: 'sparse_vector.index_options_supported' + reason: "sparse_vector token pruning index options added support in 8.19" + - skip: + features: headers + + - do: + catch: /\[tokens_freq_ratio_threshold\] must be between \[1\] and \[100\]/ + headers: + Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" #test_user credentials + Content-Type: application/json + indices.create: + index: sparse_vector_pruning_test + body: + mappings: + properties: + text: + type: text + ml.tokens: + type: sparse_vector + index_options: + prune: true + pruning_config: + tokens_freq_ratio_threshold: 101.0 + tokens_weight_threshold: 0.4 + + - match: { status: 400 } + +--- +"Check sparse_vector token pruning index_options tokens weight out of bounds": + + - requires: + cluster_features: 'sparse_vector.index_options_supported' + reason: "sparse_vector token pruning index options added support in 8.19" + - skip: + features: headers + + - do: + catch: /\[tokens_weight_threshold\] must be between 0 and 1/ + headers: + Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" #test_user credentials + Content-Type: application/json + indices.create: + index: sparse_vector_pruning_test + body: + mappings: + properties: + text: + type: text + ml.tokens: + type: sparse_vector + index_options: + prune: true + pruning_config: + tokens_freq_ratio_threshold: 5.0 + tokens_weight_threshold: 3.5 + + - match: { status: 400 } + +--- +"Check sparse_vector token pruning index_options in query": + + - requires: + cluster_features: 'sparse_vector.index_options_supported' + reason: "sparse_vector token pruning index options added support in 8.19" + - skip: + features: headers + + - do: + headers: + Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" #test_user credentials + Content-Type: application/json + indices.create: + index: test-sparse-vector-with-pruning + body: + mappings: + properties: + content_embedding: + type: sparse_vector + index_options: + prune: true + pruning_config: + tokens_freq_ratio_threshold: 1 + tokens_weight_threshold: 1.0 + + - match: { acknowledged: true } + + - do: + headers: + Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" #test_user credentials + Content-Type: application/json + indices.create: + index: test-sparse-vector-without-pruning + body: + mappings: + properties: + content_embedding: + type: sparse_vector + index_options: + prune: false + + - match: { acknowledged: true } + + - do: + headers: + Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" #test_user credentials + Content-Type: application/json + bulk: + index: test-sparse-vector-with-pruning + refresh: true + body: | + {"index": { "_id": "1" }} + {"content_embedding":{"cheese": 2.671405,"is": 0.11809908,"comet": 0.26088917}} + {"index": { "_id": "2" }} + {"content_embedding":{"planet": 2.3438394,"is": 0.54600334,"astronomy": 0.36015007,"moon": 0.20022368}} + {"index": { "_id": "3" }} + {"content_embedding":{"is": 0.6891394,"globe": 0.484035,"ocean": 0.080102935,"underground": 0.053516876}} + - do: + headers: + Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" #test_user credentials + Content-Type: application/json + bulk: + index: test-sparse-vector-without-pruning + refresh: true + body: | + {"index": { "_id": "1" }} + {"content_embedding":{"cheese": 2.671405,"is": 0.11809908,"comet": 0.26088917}} + {"index": { "_id": "2" }} + {"content_embedding":{"planet": 2.3438394,"is": 0.54600334,"astronomy": 0.36015007,"moon": 0.20022368}} + {"index": { "_id": "3" }} + {"content_embedding":{"is": 0.6891394,"globe": 0.484035,"ocean": 0.080102935,"underground": 0.053516876}} + - do: + search: + index: test-sparse-vector-without-pruning + body: + query: + sparse_vector: + field: content_embedding + query_vector: + cheese: 0.5 + comet: 0.5 + globe: 0.484035 + ocean: 0.080102935 + underground: 0.053516876 + is: 0.54600334 + + - match: { hits.total.value: 3 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.1._id: "3" } + - match: { hits.hits.2._id: "2" } + + - do: + search: + index: test-sparse-vector-with-pruning + body: + query: + sparse_vector: + field: content_embedding + query_vector: + cheese: 0.5 + comet: 0.5 + globe: 0.484035 + ocean: 0.080102935 + underground: 0.053516876 + is: 0.54600334 + + - match: { hits.total.value: 2 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.1._id: "3" } + + - do: + search: + index: test-sparse-vector-without-pruning + body: + query: + sparse_vector: + field: content_embedding + query_vector: + cheese: 0.5 + comet: 0.5 + globe: 0.484035 + ocean: 0.080102935 + underground: 0.053516876 + is: 0.54600334 + prune: true + pruning_config: + tokens_freq_ratio_threshold: 1 + tokens_weight_threshold: 1.0 + + - match: { hits.total.value: 2 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.1._id: "3" } + + - do: + search: + index: test-sparse-vector-with-pruning + body: + query: + sparse_vector: + field: content_embedding + query_vector: + cheese: 0.5 + comet: 0.5 + globe: 0.484035 + ocean: 0.080102935 + underground: 0.053516876 + is: 0.54600334 + prune: false + + - match: { hits.total.value: 3 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.1._id: "3" } + - match: { hits.hits.2._id: "2" } + +--- +"Check sparse_vector should prune by default": + + - requires: + cluster_features: 'sparse_vector.index_options_supported' + reason: "sparse_vector token pruning index options added support in 8.19" + - skip: + features: headers + + - do: + headers: + Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" #test_user credentials + Content-Type: application/json + indices.create: + index: test-sparse-vector-pruning-default + body: + mappings: + properties: + content_embedding: + type: sparse_vector + + - match: { acknowledged: true } + + - do: + headers: + Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" #test_user credentials + Content-Type: application/json + bulk: + index: test-sparse-vector-pruning-default + refresh: true + body: | + {"index": { "_id": "1" }} + {"content_embedding":{"cheese": 2.671405,"is": 0.11809908,"comet": 0.26088917}} + {"index": { "_id": "2" }} + {"content_embedding":{"planet": 2.3438394,"is": 0.14600334,"astronomy": 0.36015007,"moon": 0.20022368}} + {"index": { "_id": "3" }} + {"content_embedding":{"is": 0.1891394,"globe": 0.484035,"ocean": 0.080102935,"underground": 0.053516876}} + {"index": { "_id": "4" }} + {"content_embedding":{"is": 0.1891394}} + {"index": { "_id": "5" }} + {"content_embedding":{"is": 0.1891394}} + {"index": { "_id": "6" }} + {"content_embedding":{"is": 0.1891394}} + {"index": { "_id": "7" }} + {"content_embedding":{"is": 0.1891394}} + {"index": { "_id": "8" }} + {"content_embedding":{"is": 0.1891394}} + {"index": { "_id": "9" }} + {"content_embedding":{"is": 0.1891394}} + {"index": { "_id": "10" }} + {"content_embedding":{"is": 0.1891394}} + {"index": { "_id": "11" }} + {"content_embedding":{"is": 0.1891394}} + {"index": { "_id": "12" }} + {"content_embedding":{"is": 0.1891394}} + - do: + search: + index: test-sparse-vector-pruning-default + body: + query: + sparse_vector: + field: content_embedding + query_vector: + pugs: 0.5 + cats: 0.5 + is: 0.04600334 + + - match: { hits.total.value: 0 } + diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/sparse_vector_search.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/sparse_vector_search.yml index 3481773b0bab3..07bbd42d5d14e 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/sparse_vector_search.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/sparse_vector_search.yml @@ -89,6 +89,24 @@ setup: model_id: text_expansion_model wait_for: started +--- +teardown: + - skip: + features: headers + + - do: + headers: + Authorization: "Basic eF9wYWNrX3Jlc3RfdXNlcjp4LXBhY2stdGVzdC1wYXNzd29yZA==" # run as x_pack_rest_user, i.e. the test setup superuser + Content-Type: application/json + indices.delete: + index: ["sparse_vector_pruning_test", "test-sparse-vector-without-pruning", "test-sparse-vector-with-pruning"] + ignore: 404 + - do: + headers: + Authorization: "Basic eF9wYWNrX3Jlc3RfdXNlcjp4LXBhY2stdGVzdC1wYXNzd29yZA==" # run as x_pack_rest_user, i.e. the test setup superuser + Content-Type: application/json + indices.refresh: { } + --- "Test sparse_vector search": - do: @@ -510,3 +528,429 @@ setup: - match: { hits.hits.0._score: 4.0 } - match: { hits.hits.1._id: "parent-foo-bar" } - match: { hits.hits.1._score: 2.0 } + +--- +"Check sparse_vector token pruning index_options mappings": + + - requires: + cluster_features: 'sparse_vector.index_options_supported' + reason: "sparse_vector token pruning index options added support in 8.19" + - skip: + features: headers + + - do: + headers: + Authorization: "Basic eF9wYWNrX3Jlc3RfdXNlcjp4LXBhY2stdGVzdC1wYXNzd29yZA==" # run as x_pack_rest_user, i.e. the test setup superuser + Content-Type: application/json + indices.create: + index: sparse_vector_pruning_test + body: + mappings: + properties: + text: + type: text + ml.tokens: + type: sparse_vector + index_options: + prune: true + pruning_config: + tokens_freq_ratio_threshold: 1.0 + tokens_weight_threshold: 0.4 + + - match: { acknowledged: true } + + - do: + headers: + Authorization: "Basic eF9wYWNrX3Jlc3RfdXNlcjp4LXBhY2stdGVzdC1wYXNzd29yZA==" # run as x_pack_rest_user, i.e. the test setup superuser + Content-Type: application/json + indices.get_mapping: + index: sparse_vector_pruning_test + + - match: { sparse_vector_pruning_test.mappings.properties.ml.properties.tokens.index_options.prune: true } + - match: { sparse_vector_pruning_test.mappings.properties.ml.properties.tokens.index_options.pruning_config.tokens_freq_ratio_threshold: 1.0 } + - match: { sparse_vector_pruning_test.mappings.properties.ml.properties.tokens.index_options.pruning_config.tokens_weight_threshold: 0.4 } + +--- +"Check sparse_vector token pruning index_options mappings defaults": + + - requires: + cluster_features: 'sparse_vector.index_options_supported' + reason: "sparse_vector token pruning index options added support in 8.19" + - skip: + features: headers + + - do: + headers: + Authorization: "Basic eF9wYWNrX3Jlc3RfdXNlcjp4LXBhY2stdGVzdC1wYXNzd29yZA==" # run as x_pack_rest_user, i.e. the test setup superuser + Content-Type: application/json + indices.create: + index: sparse_vector_pruning_test + body: + mappings: + properties: + text: + type: text + ml.tokens: + type: sparse_vector + + - match: { acknowledged: true } + + - do: + headers: + Authorization: "Basic eF9wYWNrX3Jlc3RfdXNlcjp4LXBhY2stdGVzdC1wYXNzd29yZA==" # run as x_pack_rest_user, i.e. the test setup superuser + Content-Type: application/json + indices.get_mapping: + index: sparse_vector_pruning_test + + - not_exists: sparse_vector_pruning_test.mappings.properties.ml.properties.tokens.index_options + +--- +"Check sparse_vector token pruning index_options prune missing do not allow config": + + - requires: + cluster_features: 'sparse_vector.index_options_supported' + reason: "sparse_vector token pruning index options added support in 8.19" + - skip: + features: headers + + - do: + catch: /\[index_options\] field \[pruning_config\] should only be set if \[prune\] is set to true/ + headers: + Authorization: "Basic eF9wYWNrX3Jlc3RfdXNlcjp4LXBhY2stdGVzdC1wYXNzd29yZA==" # run as x_pack_rest_user, i.e. the test setup superuser + Content-Type: application/json + indices.create: + index: sparse_vector_pruning_test + body: + mappings: + properties: + text: + type: text + ml.tokens: + type: sparse_vector + index_options: + pruning_config: + tokens_freq_ratio_threshold: 1.0 + tokens_weight_threshold: 0.4 + + - match: { status: 400 } + +--- +"Check sparse_vector token pruning index_options prune false do not allow config": + + - requires: + cluster_features: 'sparse_vector.index_options_supported' + reason: "sparse_vector token pruning index options added support in 8.19" + - skip: + features: headers + + - do: + catch: /\[index_options\] field \[pruning_config\] should only be set if \[prune\] is set to true/ + headers: + Authorization: "Basic eF9wYWNrX3Jlc3RfdXNlcjp4LXBhY2stdGVzdC1wYXNzd29yZA==" # run as x_pack_rest_user, i.e. the test setup superuser + Content-Type: application/json + indices.create: + index: sparse_vector_pruning_test + body: + mappings: + properties: + text: + type: text + ml.tokens: + type: sparse_vector + index_options: + prune: false + pruning_config: + tokens_freq_ratio_threshold: 1.0 + tokens_weight_threshold: 0.4 + + - match: { status: 400 } + +--- +"Check sparse_vector token pruning index_options tokens freq out of bounds": + + - requires: + cluster_features: 'sparse_vector.index_options_supported' + reason: "sparse_vector token pruning index options added support in 8.19" + - skip: + features: headers + + - do: + catch: /\[tokens_freq_ratio_threshold\] must be between \[1\] and \[100\]/ + headers: + Authorization: "Basic eF9wYWNrX3Jlc3RfdXNlcjp4LXBhY2stdGVzdC1wYXNzd29yZA==" # run as x_pack_rest_user, i.e. the test setup superuser + Content-Type: application/json + indices.create: + index: sparse_vector_pruning_test + body: + mappings: + properties: + text: + type: text + ml.tokens: + type: sparse_vector + index_options: + prune: true + pruning_config: + tokens_freq_ratio_threshold: 101.0 + tokens_weight_threshold: 0.4 + + - match: { status: 400 } + +--- +"Check sparse_vector token pruning index_options tokens weight out of bounds": + + - requires: + cluster_features: 'sparse_vector.index_options_supported' + reason: "sparse_vector token pruning index options added support in 8.19" + - skip: + features: headers + + - do: + catch: /\[tokens_weight_threshold\] must be between 0 and 1/ + headers: + Authorization: "Basic eF9wYWNrX3Jlc3RfdXNlcjp4LXBhY2stdGVzdC1wYXNzd29yZA==" # run as x_pack_rest_user, i.e. the test setup superuser + Content-Type: application/json + indices.create: + index: sparse_vector_pruning_test + body: + mappings: + properties: + text: + type: text + ml.tokens: + type: sparse_vector + index_options: + prune: true + pruning_config: + tokens_freq_ratio_threshold: 5.0 + tokens_weight_threshold: 3.5 + + - match: { status: 400 } + +--- +"Check sparse_vector token pruning index_options in query": + + - requires: + cluster_features: 'sparse_vector.index_options_supported' + reason: "sparse_vector token pruning index options added support in 8.19" + - skip: + features: headers + + - do: + headers: + Authorization: "Basic eF9wYWNrX3Jlc3RfdXNlcjp4LXBhY2stdGVzdC1wYXNzd29yZA==" # run as x_pack_rest_user, i.e. the test setup superuser + Content-Type: application/json + indices.create: + index: test-sparse-vector-with-pruning + body: + mappings: + properties: + content_embedding: + type: sparse_vector + index_options: + prune: true + pruning_config: + tokens_freq_ratio_threshold: 1 + tokens_weight_threshold: 1.0 + settings: + number_of_shards: 1 + + - match: { acknowledged: true } + + - do: + headers: + Authorization: "Basic eF9wYWNrX3Jlc3RfdXNlcjp4LXBhY2stdGVzdC1wYXNzd29yZA==" # run as x_pack_rest_user, i.e. the test setup superuser + Content-Type: application/json + indices.create: + index: test-sparse-vector-without-pruning + body: + mappings: + properties: + content_embedding: + type: sparse_vector + index_options: + prune: false + settings: + number_of_shards: 1 + + - match: { acknowledged: true } + + - do: + headers: + Authorization: "Basic eF9wYWNrX3Jlc3RfdXNlcjp4LXBhY2stdGVzdC1wYXNzd29yZA==" # run as x_pack_rest_user, i.e. the test setup superuser + Content-Type: application/json + bulk: + index: test-sparse-vector-with-pruning + refresh: true + body: | + {"index": { "_id": "1" }} + {"content_embedding":{"cheese": 2.671405,"is": 0.11809908,"comet": 0.26088917}} + {"index": { "_id": "2" }} + {"content_embedding":{"planet": 2.3438394,"is": 0.54600334,"astronomy": 0.36015007,"moon": 0.20022368}} + {"index": { "_id": "3" }} + {"content_embedding":{"is": 0.6891394,"globe": 0.484035,"ocean": 0.080102935,"underground": 0.053516876}} + - do: + headers: + Authorization: "Basic eF9wYWNrX3Jlc3RfdXNlcjp4LXBhY2stdGVzdC1wYXNzd29yZA==" # run as x_pack_rest_user, i.e. the test setup superuser + Content-Type: application/json + bulk: + index: test-sparse-vector-without-pruning + refresh: true + body: | + {"index": { "_id": "1" }} + {"content_embedding":{"cheese": 2.671405,"is": 0.11809908,"comet": 0.26088917}} + {"index": { "_id": "2" }} + {"content_embedding":{"planet": 2.3438394,"is": 0.54600334,"astronomy": 0.36015007,"moon": 0.20022368}} + {"index": { "_id": "3" }} + {"content_embedding":{"is": 0.6891394,"globe": 0.484035,"ocean": 0.080102935,"underground": 0.053516876}} + - do: + search: + index: test-sparse-vector-without-pruning + body: + query: + sparse_vector: + field: content_embedding + query_vector: + cheese: 0.5 + comet: 0.5 + globe: 0.484035 + ocean: 0.080102935 + underground: 0.053516876 + is: 0.54600334 + + - match: { hits.total.value: 3 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.1._id: "3" } + - match: { hits.hits.2._id: "2" } + + - do: + search: + index: test-sparse-vector-with-pruning + body: + query: + sparse_vector: + field: content_embedding + query_vector: + cheese: 0.5 + comet: 0.5 + globe: 0.484035 + ocean: 0.080102935 + underground: 0.053516876 + is: 0.54600334 + + - match: { hits.total.value: 2 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.1._id: "3" } + + - do: + search: + index: test-sparse-vector-without-pruning + body: + query: + sparse_vector: + field: content_embedding + query_vector: + cheese: 0.5 + comet: 0.5 + globe: 0.484035 + ocean: 0.080102935 + underground: 0.053516876 + is: 0.54600334 + prune: true + pruning_config: + tokens_freq_ratio_threshold: 1 + tokens_weight_threshold: 1.0 + + - match: { hits.total.value: 2 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.1._id: "3" } + + - do: + search: + index: test-sparse-vector-with-pruning + body: + query: + sparse_vector: + field: content_embedding + query_vector: + cheese: 0.5 + comet: 0.5 + globe: 0.484035 + ocean: 0.080102935 + underground: 0.053516876 + is: 0.54600334 + prune: false + + - match: { hits.total.value: 3 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.1._id: "3" } + - match: { hits.hits.2._id: "2" } + +--- +"Check sparse_vector should prune by default": + + - requires: + cluster_features: 'sparse_vector.index_options_supported' + reason: "sparse_vector token pruning index options added support in 8.19" + - skip: + features: headers + + - do: + headers: + Authorization: "Basic eF9wYWNrX3Jlc3RfdXNlcjp4LXBhY2stdGVzdC1wYXNzd29yZA==" # run as x_pack_rest_user, i.e. the test setup superuser + Content-Type: application/json + indices.create: + index: test-sparse-vector-pruning-default + body: + mappings: + properties: + content_embedding: + type: sparse_vector + + - match: { acknowledged: true } + + - do: + headers: + Authorization: "Basic eF9wYWNrX3Jlc3RfdXNlcjp4LXBhY2stdGVzdC1wYXNzd29yZA==" # run as x_pack_rest_user, i.e. the test setup superuser + Content-Type: application/json + bulk: + index: test-sparse-vector-pruning-default + refresh: true + body: | + {"index": { "_id": "1" }} + {"content_embedding":{"cheese": 2.671405,"is": 0.11809908,"comet": 0.26088917}} + {"index": { "_id": "2" }} + {"content_embedding":{"planet": 2.3438394,"is": 0.14600334,"astronomy": 0.36015007,"moon": 0.20022368}} + {"index": { "_id": "3" }} + {"content_embedding":{"is": 0.1891394,"globe": 0.484035,"ocean": 0.080102935,"underground": 0.053516876}} + {"index": { "_id": "4" }} + {"content_embedding":{"is": 0.1891394}} + {"index": { "_id": "5" }} + {"content_embedding":{"is": 0.1891394}} + {"index": { "_id": "6" }} + {"content_embedding":{"is": 0.1891394}} + {"index": { "_id": "7" }} + {"content_embedding":{"is": 0.1891394}} + {"index": { "_id": "8" }} + {"content_embedding":{"is": 0.1891394}} + {"index": { "_id": "9" }} + {"content_embedding":{"is": 0.1891394}} + {"index": { "_id": "10" }} + {"content_embedding":{"is": 0.1891394}} + {"index": { "_id": "11" }} + {"content_embedding":{"is": 0.1891394}} + {"index": { "_id": "12" }} + {"content_embedding":{"is": 0.1891394}} + - do: + search: + index: test-sparse-vector-pruning-default + body: + query: + sparse_vector: + field: content_embedding + query_vector: + pugs: 0.5 + cats: 0.5 + is: 0.04600334 + + - match: { hits.total.value: 0 } From 110d04e3866deac9edcf4ca1cf9633847ec749fc Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Mon, 9 Jun 2025 23:28:32 +0000 Subject: [PATCH 05/37] [CI] Auto commit changes from spotless --- .../java/org/elasticsearch/TransportVersions.java | 2 +- .../vectors/SparseVectorFieldMapperTests.java | 13 +++++-------- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java index 2d101faf3d383..c6ae6e4279756 100644 --- a/server/src/main/java/org/elasticsearch/TransportVersions.java +++ b/server/src/main/java/org/elasticsearch/TransportVersions.java @@ -292,7 +292,7 @@ static TransportVersion def(int id) { public static final TransportVersion SEARCH_SOURCE_EXCLUDE_VECTORS_PARAM = def(9_092_0_00); public static final TransportVersion SNAPSHOT_INDEX_SHARD_STATUS_MISSING_STATS = def(9_093_0_00); public static final TransportVersion SPARSE_VECTOR_FIELD_PRUNING_OPTIONS = def(9_094_0_00); - + /* * STOP! READ THIS FIRST! No, really, * ____ _____ ___ ____ _ ____ _____ _ ____ _____ _ _ ___ ____ _____ ___ ____ ____ _____ _ diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java index 5f747d78d9c0b..c4d3dc57c92b0 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java @@ -362,7 +362,10 @@ public void testPruneMustBeBoolean() { }))); assertThat(e.getMessage(), containsString("[index_options] failed to parse field [prune]")); assertThat(e.getCause().getCause(), instanceOf(IllegalArgumentException.class)); - assertThat(e.getCause().getCause().getMessage(), containsString("Failed to parse value [othervalue] as only [true] or [false] are allowed.")); + assertThat( + e.getCause().getCause().getMessage(), + containsString("Failed to parse value [othervalue] as only [true] or [false] are allowed.") + ); } public void testPruningConfigurationIsMap() { @@ -593,13 +596,7 @@ public void testTypeQueryFinalizationWithIndexExplicitDoNotPrune() throws Except MapperService mapperService = createMapperService(version, fieldMapping(this::mappingWithIndexOptionsPruneFalse)); // query should be pruned via explicit index options - performTypeQueryFinalizationTest( - mapperService, - new SparseVectorFieldMapper.IndexOptions(false, null), - null, - null, - false - ); + performTypeQueryFinalizationTest(mapperService, new SparseVectorFieldMapper.IndexOptions(false, null), null, null, false); } public void testTypeQueryFinalizationQueryOverridesPruning() throws Exception { From 46bd54de2c1807c70c58c4cf76fdd8163ecc7ab5 Mon Sep 17 00:00:00 2001 From: "Mark J. Hoy" Date: Tue, 10 Jun 2025 09:18:36 -0400 Subject: [PATCH 06/37] register test feature for sparse vector --- .../java/org/elasticsearch/xpack/core/XPackFeatures.java | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackFeatures.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackFeatures.java index 42824a553d2bd..ad0f38d975b86 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackFeatures.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackFeatures.java @@ -12,6 +12,8 @@ import java.util.Set; +import static org.elasticsearch.index.mapper.vectors.SparseVectorFieldMapper.SPARSE_VECTOR_INDEX_OPTIONS_FEATURE; + /** * Provides the XPack features that this version of the code supports */ @@ -20,4 +22,9 @@ public class XPackFeatures implements FeatureSpecification { public Set getFeatures() { return Set.of(); } + + @Override + public Set getTestFeatures() { + return Set.of(SPARSE_VECTOR_INDEX_OPTIONS_FEATURE); + } } From e5d5a983a8b3e8f683573ec39e85bc7cbc19a286 Mon Sep 17 00:00:00 2001 From: "Mark J. Hoy" Date: Tue, 10 Jun 2025 10:34:13 -0400 Subject: [PATCH 07/37] Update docs/changelog/129089.yaml --- docs/changelog/129089.yaml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 docs/changelog/129089.yaml diff --git a/docs/changelog/129089.yaml b/docs/changelog/129089.yaml new file mode 100644 index 0000000000000..8819e0474556d --- /dev/null +++ b/docs/changelog/129089.yaml @@ -0,0 +1,6 @@ +pr: 129089 +summary: Update `sparse_vector` field mapping to include default setting for token + pruning +area: "Mapping, Relevance" +type: enhancement +issues: [] From f1a07c8c7960dc357184d5f61862292adeca35ca Mon Sep 17 00:00:00 2001 From: "Mark J. Hoy" Date: Tue, 10 Jun 2025 10:37:22 -0400 Subject: [PATCH 08/37] update changelog --- docs/changelog/129089.yaml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/docs/changelog/129089.yaml b/docs/changelog/129089.yaml index 8819e0474556d..cd7c553026eea 100644 --- a/docs/changelog/129089.yaml +++ b/docs/changelog/129089.yaml @@ -1,6 +1,5 @@ pr: 129089 -summary: Update `sparse_vector` field mapping to include default setting for token - pruning -area: "Mapping, Relevance" +summary: Update `sparse_vector` field mapping to include default setting for token pruning +area: Mapping type: enhancement issues: [] From 2fa6a88d3848200591bdfaac6328d20ffc99a4bd Mon Sep 17 00:00:00 2001 From: "Mark J. Hoy" Date: Tue, 10 Jun 2025 10:49:10 -0400 Subject: [PATCH 09/37] add docs --- .../mapping-reference/sparse-vector.md | 43 +++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/docs/reference/elasticsearch/mapping-reference/sparse-vector.md b/docs/reference/elasticsearch/mapping-reference/sparse-vector.md index f8bfe69310600..c4d380be826ea 100644 --- a/docs/reference/elasticsearch/mapping-reference/sparse-vector.md +++ b/docs/reference/elasticsearch/mapping-reference/sparse-vector.md @@ -24,6 +24,28 @@ PUT my-index } ``` +With any new indices created, token pruning will be turned on by default with appropriate defaults. You can control this behaviour using the optional `index_options` parameters for the field: + +```console +PUT my-index +{ + "mappings": { + "properties": { + "text.tokens": { + "type": "sparse_vector", + "index_options": { + "prune": true, + "pruning_config": { + "tokens_freq_ratio_threshold": 5, + "tokens_weight_threshold": 0.4 + } + } + } + } + } +} +``` + See [semantic search with ELSER](docs-content://solutions/search/semantic-search/semantic-search-elser-ingest-pipelines.md) for a complete example on adding documents to a `sparse_vector` mapped field using ELSER. ## Parameters for `sparse_vector` fields [sparse-vectors-params] @@ -36,7 +58,28 @@ The following parameters are accepted by `sparse_vector` fields: * Exclude the field from [_source](/reference/elasticsearch/rest-apis/retrieve-selected-fields.md#source-filtering). * Use [synthetic `_source`](/reference/elasticsearch/mapping-reference/mapping-source-field.md#synthetic-source). +index_options +: (Optional, object) You can set index options for your `sparse_vector` field to determine if you should prune tokens, and the parameter configurations for the token pruning. If pruning options are not set in your `sparse_query` vector, Elasticsearch will use the default options configured for the field, if any. The available options for the index options are: + +Parameters for `index_options` are: + +`prune` {applies_to}`stack: preview 9.1` +: (Optional, boolean) Whether to perform pruning, omitting the non-significant tokens from the query to improve query performance. If `prune` is true but the `pruning_config` is not specified, pruning will occur but default values will be used. Default: true. + +`pruning_config` {applies_to}`stack: preview 9.1` +: (Optional, object) Optional pruning configuration. If enabled, this will omit non-significant tokens from the query in order to improve query performance. This is only used if `prune` is set to `true`. If `prune` is set to `true` but `pruning_config` is not specified, default values will be used. If `prune` is set to false but `pruning_config` is specified, an exception will occur. + + Parameters for `pruning_config` include: + + `tokens_freq_ratio_threshold` {applies_to}`stack: preview 9.1` + : (Optional, integer) Tokens whose frequency is more than `tokens_freq_ratio_threshold` times the average frequency of all tokens in the specified field are considered outliers and pruned. This value must between 1 and 100. Default: `5`. + + `tokens_weight_threshold` {applies_to}`stack: preview 9.1` + : (Optional, float) Tokens whose weight is less than `tokens_weight_threshold` are considered insignificant and pruned. This value must be between 0 and 1. Default: `0.4`. + ::::{note} + The default values for `tokens_freq_ratio_threshold` and `tokens_weight_threshold` were chosen based on tests using ELSERv2 that provided the most optimal results. + :::: ## Multi-value sparse vectors [index-multi-value-sparse-vectors] From f98c894dc102099957fdb31c7947516c6a7cd80e Mon Sep 17 00:00:00 2001 From: "Mark J. Hoy" Date: Wed, 11 Jun 2025 15:41:05 -0400 Subject: [PATCH 10/37] explicit set default index_options if null --- .../vectors/SparseVectorFieldMapper.java | 51 ++++++++++++------- .../vectors/SparseVectorFieldMapperTests.java | 25 +++++++-- 2 files changed, 55 insertions(+), 21 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java index abc552b91d52f..13a987da5a18e 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java @@ -105,8 +105,16 @@ public static class Builder extends FieldMapper.Builder { Objects::toString ).acceptsNull(); + private final MappingParserContext mappingParserContext; + public Builder(String name) { super(name); + mappingParserContext = null; + } + + public Builder(String name, @Nullable MappingParserContext mappingParserContext) { + super(name); + this.mappingParserContext = mappingParserContext; } public Builder setStored(boolean value) { @@ -121,9 +129,17 @@ protected Parameter[] getParameters() { @Override public SparseVectorFieldMapper build(MapperBuilderContext context) { + IndexOptions builderIndexOptions = indexOptions.getValue(); + if (builderIndexOptions == null && + mappingParserContext != null && + shouldHaveDefaultPruningConfig(mappingParserContext.indexVersionCreated()) + ) { + builderIndexOptions = new IndexOptions(true, new TokenPruningConfig()); + } + return new SparseVectorFieldMapper( leafName(), - new SparseVectorFieldType(context.buildFullName(leafName()), stored.getValue(), meta.getValue(), indexOptions.getValue()), + new SparseVectorFieldType(context.buildFullName(leafName()), stored.getValue(), meta.getValue(), builderIndexOptions), builderParams(this, context) ); } @@ -171,7 +187,7 @@ private static SparseVectorFieldMapper.IndexOptions parseIndexOptions(MappingPar throw new IllegalArgumentException(ERROR_MESSAGE_8X); } - return new Builder(n); + return new Builder(n, c); }, notInMultiFields(CONTENT_TYPE)); public static final class SparseVectorFieldType extends MappedFieldType { @@ -270,20 +286,7 @@ public Query finalizeSparseVectorQuery( private TokenPruningConfig getDefaultPruningConfig(SearchExecutionContext context) { IndexVersion indexVersion = context.indexVersionCreated(); - - if (indexVersion.onOrAfter(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION)) { - // default pruning for 9.1.0+ is true for this index - return new TokenPruningConfig(); - } - - if (indexVersion.between(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION_8_X, IndexVersions.UPGRADE_TO_LUCENE_10_0_0)) { - // default pruning for 8.19.0+ is true for this index - return new TokenPruningConfig(); - } - - // the index version is before we added index_options support - // so pruning is off by default - return null; + return (shouldHaveDefaultPruningConfig(indexVersion)) ? new TokenPruningConfig() : null; } private static String indexedValueForSearch(Object value) { @@ -313,7 +316,7 @@ public Map indexAnalyzers() { @Override public FieldMapper.Builder getMergeBuilder() { - return new Builder(leafName()).init(this); + return new Builder(leafName(), null).init(this); } @Override @@ -391,6 +394,20 @@ protected String contentType() { return CONTENT_TYPE; } + private static boolean shouldHaveDefaultPruningConfig(IndexVersion indexVersion) { + if (indexVersion.onOrAfter(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION)) { + // default pruning for 9.1.0+ is true for this index + return true; + } + + // default pruning for 8.19.0+ is true for this index + if (indexVersion.between(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION_8_X, IndexVersions.UPGRADE_TO_LUCENE_10_0_0)) { + return true; + } + + return false; + } + private static class SparseVectorValueFetcher implements ValueFetcher { private final String fieldName; private TermVectors termVectors; diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java index c4d3dc57c92b0..a8b5ca069510a 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java @@ -29,10 +29,12 @@ import org.elasticsearch.index.mapper.DocumentMapper; import org.elasticsearch.index.mapper.DocumentParsingException; import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.index.mapper.Mapper; import org.elasticsearch.index.mapper.MapperParsingException; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.mapper.MapperTestCase; import org.elasticsearch.index.mapper.ParsedDocument; +import org.elasticsearch.index.mapper.SourceToParse; import org.elasticsearch.index.query.SearchExecutionContext; import org.elasticsearch.inference.WeightedToken; import org.elasticsearch.search.lookup.Source; @@ -51,8 +53,10 @@ import java.util.List; import java.util.Map; +import static org.elasticsearch.index.IndexVersions.UPGRADE_TO_LUCENE_10_0_0; import static org.elasticsearch.index.mapper.vectors.SparseVectorFieldMapper.NEW_SPARSE_VECTOR_INDEX_VERSION; import static org.elasticsearch.index.mapper.vectors.SparseVectorFieldMapper.PREVIOUS_SPARSE_VECTOR_INDEX_VERSION; +import static org.elasticsearch.index.mapper.vectors.SparseVectorFieldMapper.SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION; import static org.elasticsearch.xcontent.XContentFactory.jsonBuilder; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; @@ -78,7 +82,7 @@ protected void minimalMapping(XContentBuilder b) throws IOException { b.field("type", "sparse_vector"); } - protected void mappingWithIndexOptions(XContentBuilder b) throws IOException { + protected void minimalMappingWithExplicitIndexOptions(XContentBuilder b) throws IOException { b.field("type", "sparse_vector"); b.startObject("index_options"); { @@ -131,7 +135,7 @@ private static int getFrequency(TokenStream tk) throws IOException { public void testDefaults() throws Exception { DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); - assertEquals(Strings.toString(fieldMapping(this::minimalMapping)), mapper.mappingSource().toString()); + assertEquals(Strings.toString(fieldMapping(this::minimalMappingWithExplicitIndexOptions)), mapper.mappingSource().toString()); ParsedDocument doc1 = mapper.parse(source(this::writeField)); @@ -155,6 +159,19 @@ public void testDefaults() throws Exception { assertTrue(freq1 < freq2); } + public void testMappingWithoutIndexOptionsUsesDefaults() throws Exception { + DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); + assertEquals(Strings.toString(fieldMapping(this::minimalMappingWithExplicitIndexOptions)), mapper.mappingSource().toString()); + + IndexVersion preIndexOptionsVersion = IndexVersionUtils.randomVersionBetween( + random(), + UPGRADE_TO_LUCENE_10_0_0, + SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION + ); + DocumentMapper previousMapper = createDocumentMapper(preIndexOptionsVersion, fieldMapping(this::minimalMapping)); + assertEquals(Strings.toString(fieldMapping(this::minimalMapping)), previousMapper.mappingSource().toString()); + } + public void testDotInFieldName() throws Exception { DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); ParsedDocument parsedDocument = mapper.parse(source(b -> b.field("field", Map.of("foo.bar", 10, "foobar", 20)))); @@ -579,7 +596,7 @@ public void testTypeQueryFinalizationDefaultsPreviousVersion() throws Exception public void testTypeQueryFinalizationWithIndexExplicit() throws Exception { IndexVersion version = IndexVersion.current(); - MapperService mapperService = createMapperService(version, fieldMapping(this::mappingWithIndexOptions)); + MapperService mapperService = createMapperService(version, fieldMapping(this::minimalMapping)); // query should be pruned via explicit index options performTypeQueryFinalizationTest( @@ -758,7 +775,7 @@ private MapperService getMapperServiceForRandomizedFinalizationTest( return createMapperService(indexVersion, fieldMapping(this::mappingWithIndexOptionsPruneFalse)); } - return createMapperService(indexVersion, fieldMapping(this::mappingWithIndexOptions)); + return createMapperService(indexVersion, fieldMapping(this::minimalMapping)); } private static List QUERY_VECTORS = List.of( From 68949c07042f0df584728d054d926cbb5d7428a7 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Wed, 11 Jun 2025 19:50:08 +0000 Subject: [PATCH 11/37] [CI] Auto commit changes from spotless --- .../index/mapper/vectors/SparseVectorFieldMapper.java | 7 +++---- .../index/mapper/vectors/SparseVectorFieldMapperTests.java | 2 -- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java index 13a987da5a18e..cf9918a4af6e6 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java @@ -130,10 +130,9 @@ protected Parameter[] getParameters() { @Override public SparseVectorFieldMapper build(MapperBuilderContext context) { IndexOptions builderIndexOptions = indexOptions.getValue(); - if (builderIndexOptions == null && - mappingParserContext != null && - shouldHaveDefaultPruningConfig(mappingParserContext.indexVersionCreated()) - ) { + if (builderIndexOptions == null + && mappingParserContext != null + && shouldHaveDefaultPruningConfig(mappingParserContext.indexVersionCreated())) { builderIndexOptions = new IndexOptions(true, new TokenPruningConfig()); } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java index a8b5ca069510a..abc43a686b59b 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java @@ -29,12 +29,10 @@ import org.elasticsearch.index.mapper.DocumentMapper; import org.elasticsearch.index.mapper.DocumentParsingException; import org.elasticsearch.index.mapper.MappedFieldType; -import org.elasticsearch.index.mapper.Mapper; import org.elasticsearch.index.mapper.MapperParsingException; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.mapper.MapperTestCase; import org.elasticsearch.index.mapper.ParsedDocument; -import org.elasticsearch.index.mapper.SourceToParse; import org.elasticsearch.index.query.SearchExecutionContext; import org.elasticsearch.inference.WeightedToken; import org.elasticsearch.search.lookup.Source; From aeedc14fc8ff198932e6f53fc3cfa52cb751ef15 Mon Sep 17 00:00:00 2001 From: "Mark J. Hoy" Date: Wed, 11 Jun 2025 22:18:23 -0400 Subject: [PATCH 12/37] update yaml tests; update docs --- .../mapping-reference/sparse-vector.md | 9 +++++++ .../test/multi_cluster/50_sparse_vector.yml | 23 +++++++++++++++--- .../test/remote_cluster/50_sparse_vector.yml | 24 ++++++++++++++++--- .../test/ml/sparse_vector_search.yml | 24 ++++++++++++++++--- 4 files changed, 71 insertions(+), 9 deletions(-) diff --git a/docs/reference/elasticsearch/mapping-reference/sparse-vector.md b/docs/reference/elasticsearch/mapping-reference/sparse-vector.md index c4d380be826ea..54f939773c07a 100644 --- a/docs/reference/elasticsearch/mapping-reference/sparse-vector.md +++ b/docs/reference/elasticsearch/mapping-reference/sparse-vector.md @@ -81,6 +81,15 @@ Parameters for `index_options` are: The default values for `tokens_freq_ratio_threshold` and `tokens_weight_threshold` were chosen based on tests using ELSERv2 that provided the most optimal results. :::: +When token pruning is applied, non-significant tokens will be pruned from the query. +Non-significant tokens can be defined as tokens that meet both of the following criteria: +* The token appears much more frequently than most tokens, indicating that it is a very common word and may not benefit the overall search results much. +* The weight/score is so low that the token is likely not very relevant to the original term + +Both the token frequency threshold and weight threshold must show the token is non-significant in order for the token to be pruned. +This ensures the tokens that are kept are frequent enough and have very high scoring or very infrequent tokens that may not have as high of a score. + + ## Multi-value sparse vectors [index-multi-value-sparse-vectors] When passing in arrays of values for sparse vectors the max value for similarly named features is selected. diff --git a/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/multi_cluster/50_sparse_vector.yml b/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/multi_cluster/50_sparse_vector.yml index c6d9371ebc01e..19187eb911123 100644 --- a/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/multi_cluster/50_sparse_vector.yml +++ b/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/multi_cluster/50_sparse_vector.yml @@ -605,9 +605,10 @@ teardown: {"index": { "_id": "10" }} {"content_embedding":{"is": 0.1891394}} {"index": { "_id": "11" }} - {"content_embedding":{"is": 0.1891394}} + {"content_embedding":{"is": 0.6, "pugs": 0.6 }} {"index": { "_id": "12" }} - {"content_embedding":{"is": 0.1891394}} + {"content_embedding":{"is": 0.1891394, "pugs": 0.1 }} + - do: search: index: test-sparse-vector-pruning-default @@ -620,5 +621,21 @@ teardown: cats: 0.5 is: 0.04600334 - - match: { hits.total.value: 0 } + - match: { hits.total.value: 2 } + - match: { hits.hits.0._id: "11" } + - match: { hits.hits.1._id: "12" } + + - do: + search: + index: test-sparse-vector-pruning-default + body: + query: + sparse_vector: + field: content_embedding + query_vector: + pugs: 0.5 + cats: 0.5 + is: 0.04600334 + prune: false + - match: { hits.total.value: 12 } diff --git a/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/remote_cluster/50_sparse_vector.yml b/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/remote_cluster/50_sparse_vector.yml index 0c0464a412a2f..9245ead69c0a5 100644 --- a/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/remote_cluster/50_sparse_vector.yml +++ b/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/remote_cluster/50_sparse_vector.yml @@ -603,9 +603,26 @@ teardown: {"index": { "_id": "10" }} {"content_embedding":{"is": 0.1891394}} {"index": { "_id": "11" }} - {"content_embedding":{"is": 0.1891394}} + {"content_embedding":{"is": 0.6, "pugs": 0.6 }} {"index": { "_id": "12" }} - {"content_embedding":{"is": 0.1891394}} + {"content_embedding":{"is": 0.1891394, "pugs": 0.1 }} + + - do: + search: + index: test-sparse-vector-pruning-default + body: + query: + sparse_vector: + field: content_embedding + query_vector: + pugs: 0.5 + cats: 0.5 + is: 0.04600334 + + - match: { hits.total.value: 2 } + - match: { hits.hits.0._id: "11" } + - match: { hits.hits.1._id: "12" } + - do: search: index: test-sparse-vector-pruning-default @@ -617,6 +634,7 @@ teardown: pugs: 0.5 cats: 0.5 is: 0.04600334 + prune: false - - match: { hits.total.value: 0 } + - match: { hits.total.value: 12 } diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/sparse_vector_search.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/sparse_vector_search.yml index 07bbd42d5d14e..92a354c9948cc 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/sparse_vector_search.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/sparse_vector_search.yml @@ -938,9 +938,10 @@ teardown: {"index": { "_id": "10" }} {"content_embedding":{"is": 0.1891394}} {"index": { "_id": "11" }} - {"content_embedding":{"is": 0.1891394}} + {"content_embedding":{"is": 0.6, "pugs": 0.6 }} {"index": { "_id": "12" }} - {"content_embedding":{"is": 0.1891394}} + {"content_embedding":{"is": 0.1891394, "pugs": 0.1 }} + - do: search: index: test-sparse-vector-pruning-default @@ -953,4 +954,21 @@ teardown: cats: 0.5 is: 0.04600334 - - match: { hits.total.value: 0 } + - match: { hits.total.value: 2 } + - match: { hits.hits.0._id: "11" } + - match: { hits.hits.1._id: "12" } + + - do: + search: + index: test-sparse-vector-pruning-default + body: + query: + sparse_vector: + field: content_embedding + query_vector: + pugs: 0.5 + cats: 0.5 + is: 0.04600334 + prune: false + + - match: { hits.total.value: 12 } From 6e6c6676b40f985a19d359d729b59a904fed0a63 Mon Sep 17 00:00:00 2001 From: "Mark J. Hoy" Date: Thu, 12 Jun 2025 08:35:57 -0400 Subject: [PATCH 13/37] fix yaml tests --- .../test/multi_cluster/50_sparse_vector.yml | 5 ++++- .../test/remote_cluster/50_sparse_vector.yml | 5 ++++- .../test/ml/sparse_vector_search.yml | 21 +++++-------------- 3 files changed, 13 insertions(+), 18 deletions(-) diff --git a/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/multi_cluster/50_sparse_vector.yml b/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/multi_cluster/50_sparse_vector.yml index 19187eb911123..92edc8c3bb5b7 100644 --- a/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/multi_cluster/50_sparse_vector.yml +++ b/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/multi_cluster/50_sparse_vector.yml @@ -273,7 +273,10 @@ teardown: indices.get_mapping: index: sparse_vector_pruning_test - - not_exists: sparse_vector_pruning_test.mappings.properties.ml.properties.tokens.index_options + # the index_options with pruning defaults will be serialized here explicitly + - match: { sparse_vector_pruning_test.mappings.properties.ml.properties.tokens.index_options.prune: true } + - match: { sparse_vector_pruning_test.mappings.properties.ml.properties.tokens.index_options.pruning_config.tokens_freq_ratio_threshold: 5.0 } + - match: { sparse_vector_pruning_test.mappings.properties.ml.properties.tokens.index_options.pruning_config.tokens_weight_threshold: 0.4 } --- "Check sparse_vector token pruning index_options prune missing do not allow config": diff --git a/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/remote_cluster/50_sparse_vector.yml b/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/remote_cluster/50_sparse_vector.yml index 9245ead69c0a5..84c1112a66cae 100644 --- a/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/remote_cluster/50_sparse_vector.yml +++ b/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/remote_cluster/50_sparse_vector.yml @@ -271,7 +271,10 @@ teardown: indices.get_mapping: index: sparse_vector_pruning_test - - not_exists: sparse_vector_pruning_test.mappings.properties.ml.properties.tokens.index_options + # the index_options with pruning defaults will be serialized here explicitly + - match: { sparse_vector_pruning_test.mappings.properties.ml.properties.tokens.index_options.prune: true } + - match: { sparse_vector_pruning_test.mappings.properties.ml.properties.tokens.index_options.pruning_config.tokens_freq_ratio_threshold: 5.0 } + - match: { sparse_vector_pruning_test.mappings.properties.ml.properties.tokens.index_options.pruning_config.tokens_weight_threshold: 0.4 } --- "Check sparse_vector token pruning index_options prune missing do not allow config": diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/sparse_vector_search.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/sparse_vector_search.yml index 92a354c9948cc..9a66a43367ce5 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/sparse_vector_search.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/sparse_vector_search.yml @@ -96,14 +96,12 @@ teardown: - do: headers: - Authorization: "Basic eF9wYWNrX3Jlc3RfdXNlcjp4LXBhY2stdGVzdC1wYXNzd29yZA==" # run as x_pack_rest_user, i.e. the test setup superuser Content-Type: application/json indices.delete: index: ["sparse_vector_pruning_test", "test-sparse-vector-without-pruning", "test-sparse-vector-with-pruning"] ignore: 404 - do: headers: - Authorization: "Basic eF9wYWNrX3Jlc3RfdXNlcjp4LXBhY2stdGVzdC1wYXNzd29yZA==" # run as x_pack_rest_user, i.e. the test setup superuser Content-Type: application/json indices.refresh: { } @@ -540,7 +538,6 @@ teardown: - do: headers: - Authorization: "Basic eF9wYWNrX3Jlc3RfdXNlcjp4LXBhY2stdGVzdC1wYXNzd29yZA==" # run as x_pack_rest_user, i.e. the test setup superuser Content-Type: application/json indices.create: index: sparse_vector_pruning_test @@ -561,7 +558,6 @@ teardown: - do: headers: - Authorization: "Basic eF9wYWNrX3Jlc3RfdXNlcjp4LXBhY2stdGVzdC1wYXNzd29yZA==" # run as x_pack_rest_user, i.e. the test setup superuser Content-Type: application/json indices.get_mapping: index: sparse_vector_pruning_test @@ -581,7 +577,6 @@ teardown: - do: headers: - Authorization: "Basic eF9wYWNrX3Jlc3RfdXNlcjp4LXBhY2stdGVzdC1wYXNzd29yZA==" # run as x_pack_rest_user, i.e. the test setup superuser Content-Type: application/json indices.create: index: sparse_vector_pruning_test @@ -597,12 +592,15 @@ teardown: - do: headers: - Authorization: "Basic eF9wYWNrX3Jlc3RfdXNlcjp4LXBhY2stdGVzdC1wYXNzd29yZA==" # run as x_pack_rest_user, i.e. the test setup superuser Content-Type: application/json indices.get_mapping: index: sparse_vector_pruning_test - - not_exists: sparse_vector_pruning_test.mappings.properties.ml.properties.tokens.index_options + # the index_options with pruning defaults will be serialized here explicitly + - match: { sparse_vector_pruning_test.mappings.properties.ml.properties.tokens.index_options.prune: true } + - match: { sparse_vector_pruning_test.mappings.properties.ml.properties.tokens.index_options.pruning_config.tokens_freq_ratio_threshold: 5.0 } + - match: { sparse_vector_pruning_test.mappings.properties.ml.properties.tokens.index_options.pruning_config.tokens_weight_threshold: 0.4 } + --- "Check sparse_vector token pruning index_options prune missing do not allow config": @@ -646,7 +644,6 @@ teardown: - do: catch: /\[index_options\] field \[pruning_config\] should only be set if \[prune\] is set to true/ headers: - Authorization: "Basic eF9wYWNrX3Jlc3RfdXNlcjp4LXBhY2stdGVzdC1wYXNzd29yZA==" # run as x_pack_rest_user, i.e. the test setup superuser Content-Type: application/json indices.create: index: sparse_vector_pruning_test @@ -677,7 +674,6 @@ teardown: - do: catch: /\[tokens_freq_ratio_threshold\] must be between \[1\] and \[100\]/ headers: - Authorization: "Basic eF9wYWNrX3Jlc3RfdXNlcjp4LXBhY2stdGVzdC1wYXNzd29yZA==" # run as x_pack_rest_user, i.e. the test setup superuser Content-Type: application/json indices.create: index: sparse_vector_pruning_test @@ -708,7 +704,6 @@ teardown: - do: catch: /\[tokens_weight_threshold\] must be between 0 and 1/ headers: - Authorization: "Basic eF9wYWNrX3Jlc3RfdXNlcjp4LXBhY2stdGVzdC1wYXNzd29yZA==" # run as x_pack_rest_user, i.e. the test setup superuser Content-Type: application/json indices.create: index: sparse_vector_pruning_test @@ -738,7 +733,6 @@ teardown: - do: headers: - Authorization: "Basic eF9wYWNrX3Jlc3RfdXNlcjp4LXBhY2stdGVzdC1wYXNzd29yZA==" # run as x_pack_rest_user, i.e. the test setup superuser Content-Type: application/json indices.create: index: test-sparse-vector-with-pruning @@ -759,7 +753,6 @@ teardown: - do: headers: - Authorization: "Basic eF9wYWNrX3Jlc3RfdXNlcjp4LXBhY2stdGVzdC1wYXNzd29yZA==" # run as x_pack_rest_user, i.e. the test setup superuser Content-Type: application/json indices.create: index: test-sparse-vector-without-pruning @@ -777,7 +770,6 @@ teardown: - do: headers: - Authorization: "Basic eF9wYWNrX3Jlc3RfdXNlcjp4LXBhY2stdGVzdC1wYXNzd29yZA==" # run as x_pack_rest_user, i.e. the test setup superuser Content-Type: application/json bulk: index: test-sparse-vector-with-pruning @@ -791,7 +783,6 @@ teardown: {"content_embedding":{"is": 0.6891394,"globe": 0.484035,"ocean": 0.080102935,"underground": 0.053516876}} - do: headers: - Authorization: "Basic eF9wYWNrX3Jlc3RfdXNlcjp4LXBhY2stdGVzdC1wYXNzd29yZA==" # run as x_pack_rest_user, i.e. the test setup superuser Content-Type: application/json bulk: index: test-sparse-vector-without-pruning @@ -897,7 +888,6 @@ teardown: - do: headers: - Authorization: "Basic eF9wYWNrX3Jlc3RfdXNlcjp4LXBhY2stdGVzdC1wYXNzd29yZA==" # run as x_pack_rest_user, i.e. the test setup superuser Content-Type: application/json indices.create: index: test-sparse-vector-pruning-default @@ -911,7 +901,6 @@ teardown: - do: headers: - Authorization: "Basic eF9wYWNrX3Jlc3RfdXNlcjp4LXBhY2stdGVzdC1wYXNzd29yZA==" # run as x_pack_rest_user, i.e. the test setup superuser Content-Type: application/json bulk: index: test-sparse-vector-pruning-default From 87ab9dd50eb66264e6fab89e39a8fa7a36c1c530 Mon Sep 17 00:00:00 2001 From: "Mark J. Hoy" Date: Thu, 12 Jun 2025 09:36:15 -0400 Subject: [PATCH 14/37] readd auth for teardown --- .../resources/rest-api-spec/test/ml/sparse_vector_search.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/sparse_vector_search.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/sparse_vector_search.yml index 9a66a43367ce5..4e968b6db3f57 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/sparse_vector_search.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/sparse_vector_search.yml @@ -96,12 +96,14 @@ teardown: - do: headers: + Authorization: "Basic eF9wYWNrX3Jlc3RfdXNlcjp4LXBhY2stdGVzdC1wYXNzd29yZA==" # run as x_pack_rest_user, i.e. the test setup superuser Content-Type: application/json indices.delete: index: ["sparse_vector_pruning_test", "test-sparse-vector-without-pruning", "test-sparse-vector-with-pruning"] ignore: 404 - do: headers: + Authorization: "Basic eF9wYWNrX3Jlc3RfdXNlcjp4LXBhY2stdGVzdC1wYXNzd29yZA==" # run as x_pack_rest_user, i.e. the test setup superuser Content-Type: application/json indices.refresh: { } From afd01d1cd719e375a0ebc15f58532f908759a69c Mon Sep 17 00:00:00 2001 From: "Mark J. Hoy" Date: Mon, 16 Jun 2025 14:54:37 -0400 Subject: [PATCH 15/37] only serialize index options if not default --- .../vectors/SparseVectorFieldMapper.java | 23 +++++++- .../vectors/SparseVectorFieldMapperTests.java | 58 ++++++++++++++++++- 2 files changed, 77 insertions(+), 4 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java index cf9918a4af6e6..9c8799a3f2f73 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java @@ -101,10 +101,22 @@ public static class Builder extends FieldMapper.Builder { () -> null, (n, c, o) -> parseIndexOptions(c, o), m -> toType(m).fieldType().indexOptions, - XContentBuilder::field, + this::serializeIndexOptions, + // XContentBuilder::field, Objects::toString ).acceptsNull(); + private void serializeIndexOptions(XContentBuilder builder, String name, IndexOptions value) throws IOException { + if (value instanceof IndexOptions serializeIndexOptions) { + if (IndexOptions.isDefaultOptions(serializeIndexOptions)) { + // do not emit anything if it's the default options + return; + } + } + + builder.field(name, value); + } + private final MappingParserContext mappingParserContext; public Builder(String name) { @@ -541,6 +553,15 @@ public static class IndexOptions implements ToXContent { this.pruningConfig = pruningConfig; } + public static boolean isDefaultOptions(IndexOptions indexOptions) { + if (indexOptions == null || indexOptions.prune == null || indexOptions.prune == false || indexOptions.pruningConfig == null) { + return false; + } + + return (indexOptions.pruningConfig.getTokensFreqRatioThreshold() == TokenPruningConfig.DEFAULT_TOKENS_FREQ_RATIO_THRESHOLD + && indexOptions.pruningConfig.getTokensWeightThreshold() == TokenPruningConfig.DEFAULT_TOKENS_WEIGHT_THRESHOLD); + } + public Boolean getPrune() { return prune; } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java index abc43a686b59b..fef56ef945098 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java @@ -87,7 +87,22 @@ protected void minimalMappingWithExplicitIndexOptions(XContentBuilder b) throws b.field("prune", true); b.startObject("pruning_config"); { - b.field("tokens_freq_ratio_threshold", TokenPruningConfig.DEFAULT_TOKENS_FREQ_RATIO_THRESHOLD); + b.field("tokens_freq_ratio_threshold", 3.0f); + b.field("tokens_weight_threshold", 0.5f); + } + b.endObject(); + } + b.endObject(); + } + + protected void serializedMappingWithSomeIndexOptions(XContentBuilder b) throws IOException { + b.field("type", "sparse_vector"); + b.startObject("index_options"); + { + b.field("prune", true); + b.startObject("pruning_config"); + { + b.field("tokens_freq_ratio_threshold", 3.0f); b.field("tokens_weight_threshold", TokenPruningConfig.DEFAULT_TOKENS_WEIGHT_THRESHOLD); } b.endObject(); @@ -95,6 +110,29 @@ protected void minimalMappingWithExplicitIndexOptions(XContentBuilder b) throws b.endObject(); } + protected void minimalMappingWithSomeExplicitIndexOptions(XContentBuilder b) throws IOException { + b.field("type", "sparse_vector"); + b.startObject("index_options"); + { + b.field("prune", true); + b.startObject("pruning_config"); + { + b.field("tokens_freq_ratio_threshold", 3.0f); + } + b.endObject(); + } + b.endObject(); + } + + protected void mappingWithIndexOptionsOnlyPruneTrue(XContentBuilder b) throws IOException { + b.field("type", "sparse_vector"); + b.startObject("index_options"); + { + b.field("prune", true); + } + b.endObject(); + } + protected void mappingWithIndexOptionsPruneFalse(XContentBuilder b) throws IOException { b.field("type", "sparse_vector"); b.startObject("index_options"); @@ -133,7 +171,7 @@ private static int getFrequency(TokenStream tk) throws IOException { public void testDefaults() throws Exception { DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); - assertEquals(Strings.toString(fieldMapping(this::minimalMappingWithExplicitIndexOptions)), mapper.mappingSource().toString()); + assertEquals(Strings.toString(fieldMapping(this::minimalMapping)), mapper.mappingSource().toString()); ParsedDocument doc1 = mapper.parse(source(this::writeField)); @@ -159,7 +197,7 @@ public void testDefaults() throws Exception { public void testMappingWithoutIndexOptionsUsesDefaults() throws Exception { DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); - assertEquals(Strings.toString(fieldMapping(this::minimalMappingWithExplicitIndexOptions)), mapper.mappingSource().toString()); + assertEquals(Strings.toString(fieldMapping(this::minimalMapping)), mapper.mappingSource().toString()); IndexVersion preIndexOptionsVersion = IndexVersionUtils.randomVersionBetween( random(), @@ -170,6 +208,20 @@ public void testMappingWithoutIndexOptionsUsesDefaults() throws Exception { assertEquals(Strings.toString(fieldMapping(this::minimalMapping)), previousMapper.mappingSource().toString()); } + public void testMappingWithExplicitIndexOptions() throws Exception { + DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMappingWithExplicitIndexOptions)); + assertEquals(Strings.toString(fieldMapping(this::minimalMappingWithExplicitIndexOptions)), mapper.mappingSource().toString()); + + mapper = createDocumentMapper(fieldMapping(this::mappingWithIndexOptionsPruneFalse)); + assertEquals(Strings.toString(fieldMapping(this::mappingWithIndexOptionsPruneFalse)), mapper.mappingSource().toString()); + + mapper = createDocumentMapper(fieldMapping(this::minimalMappingWithSomeExplicitIndexOptions)); + assertEquals(Strings.toString(fieldMapping(this::serializedMappingWithSomeIndexOptions)), mapper.mappingSource().toString()); + + mapper = createDocumentMapper(fieldMapping(this::mappingWithIndexOptionsOnlyPruneTrue)); + assertEquals(Strings.toString(fieldMapping(this::mappingWithIndexOptionsOnlyPruneTrue)), mapper.mappingSource().toString()); + } + public void testDotInFieldName() throws Exception { DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); ParsedDocument parsedDocument = mapper.parse(source(b -> b.field("field", Map.of("foo.bar", 10, "foobar", 20)))); From 3f4801be219ef437a87f113d09252d24f187c784 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Mon, 16 Jun 2025 19:03:04 +0000 Subject: [PATCH 16/37] [CI] Auto commit changes from spotless --- .../index/mapper/vectors/SparseVectorFieldMapperTests.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java index fef56ef945098..ef13b5888faf1 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java @@ -220,7 +220,7 @@ public void testMappingWithExplicitIndexOptions() throws Exception { mapper = createDocumentMapper(fieldMapping(this::mappingWithIndexOptionsOnlyPruneTrue)); assertEquals(Strings.toString(fieldMapping(this::mappingWithIndexOptionsOnlyPruneTrue)), mapper.mappingSource().toString()); - } + } public void testDotInFieldName() throws Exception { DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); From 6307f93ddf9cb8a6aceb608796a978d50f72e1d7 Mon Sep 17 00:00:00 2001 From: "Mark J. Hoy" Date: Mon, 16 Jun 2025 20:18:14 -0400 Subject: [PATCH 17/37] serialization refactor; pass index version around --- .../vectors/SparseVectorFieldMapper.java | 172 ++++++++---------- .../vectors/SparseVectorFieldMapperTests.java | 27 ++- .../vectors/SparseVectorFieldTypeTests.java | 7 +- .../mapper/SemanticTextFieldMapper.java | 4 +- 4 files changed, 107 insertions(+), 103 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java index 9c8799a3f2f73..e62aac5401405 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java @@ -88,45 +88,29 @@ public class SparseVectorFieldMapper extends FieldMapper { public static final NodeFeature SPARSE_VECTOR_INDEX_OPTIONS_FEATURE = new NodeFeature("sparse_vector.index_options_supported"); + private final IndexVersion indexVersion; + private static SparseVectorFieldMapper toType(FieldMapper in) { return (SparseVectorFieldMapper) in; } public static class Builder extends FieldMapper.Builder { + private final IndexVersion indexVersion; + private final Parameter stored = Parameter.storeParam(m -> toType(m).fieldType().isStored(), false); private final Parameter> meta = Parameter.metaParam(); - private final Parameter indexOptions = new Parameter<>( - SPARSE_VECTOR_INDEX_OPTIONS, + private final Parameter indexOptions = new Parameter<>(SPARSE_VECTOR_INDEX_OPTIONS, true, () -> null, (n, c, o) -> parseIndexOptions(c, o), m -> toType(m).fieldType().indexOptions, - this::serializeIndexOptions, - // XContentBuilder::field, + XContentBuilder::field, Objects::toString - ).acceptsNull(); - - private void serializeIndexOptions(XContentBuilder builder, String name, IndexOptions value) throws IOException { - if (value instanceof IndexOptions serializeIndexOptions) { - if (IndexOptions.isDefaultOptions(serializeIndexOptions)) { - // do not emit anything if it's the default options - return; - } - } - - builder.field(name, value); - } - - private final MappingParserContext mappingParserContext; + ).acceptsNull().setSerializerCheck(this::indexOptionsSerializerCheck); - public Builder(String name) { + public Builder(String name, @Nullable IndexVersion indexVersion) { super(name); - mappingParserContext = null; - } - - public Builder(String name, @Nullable MappingParserContext mappingParserContext) { - super(name); - this.mappingParserContext = mappingParserContext; + this.indexVersion = indexVersion; } public Builder setStored(boolean value) { @@ -142,18 +126,27 @@ protected Parameter[] getParameters() { @Override public SparseVectorFieldMapper build(MapperBuilderContext context) { IndexOptions builderIndexOptions = indexOptions.getValue(); - if (builderIndexOptions == null - && mappingParserContext != null - && shouldHaveDefaultPruningConfig(mappingParserContext.indexVersionCreated())) { + if (builderIndexOptions == null && indexVersion != null && indexVersionSupportsDefaultPruningConfig(indexVersion)) { builderIndexOptions = new IndexOptions(true, new TokenPruningConfig()); } return new SparseVectorFieldMapper( leafName(), - new SparseVectorFieldType(context.buildFullName(leafName()), stored.getValue(), meta.getValue(), builderIndexOptions), - builderParams(this, context) + new SparseVectorFieldType( + indexVersion, + context.buildFullName(leafName()), + stored.getValue(), + meta.getValue(), + builderIndexOptions + ), + builderParams(this, context), + indexVersion ); } + + private boolean indexOptionsSerializerCheck(boolean includeDefaults, boolean isConfigured, IndexOptions value) { + return includeDefaults || (value == null || IndexOptions.isDefaultOptions(value, indexVersion)) == false; + } } public IndexOptions getIndexOptions() { @@ -177,12 +170,8 @@ private static SparseVectorFieldMapper.IndexOptions parseIndexOptions(MappingPar Map indexOptionsMap = XContentMapValues.nodeMapValue(propNode, SPARSE_VECTOR_INDEX_OPTIONS); - XContentParser parser = new MapXContentParser( - NamedXContentRegistry.EMPTY, - DeprecationHandler.IGNORE_DEPRECATIONS, - indexOptionsMap, - XContentType.JSON - ); + XContentParser parser = + new MapXContentParser(NamedXContentRegistry.EMPTY, DeprecationHandler.IGNORE_DEPRECATIONS, indexOptionsMap, XContentType.JSON); try { return INDEX_OPTIONS_PARSER.parse(parser, null); @@ -198,24 +187,29 @@ private static SparseVectorFieldMapper.IndexOptions parseIndexOptions(MappingPar throw new IllegalArgumentException(ERROR_MESSAGE_8X); } - return new Builder(n, c); + return new Builder(n, c.indexVersionCreated()); }, notInMultiFields(CONTENT_TYPE)); public static final class SparseVectorFieldType extends MappedFieldType { private final IndexOptions indexOptions; - public SparseVectorFieldType(String name, boolean isStored, Map meta) { - this(name, isStored, meta, null); + public SparseVectorFieldType(IndexVersion indexVersion, String name, boolean isStored, Map meta) { + this(indexVersion, name, isStored, meta, null); } public SparseVectorFieldType( + IndexVersion indexVersion, String name, boolean isStored, Map meta, @Nullable SparseVectorFieldMapper.IndexOptions indexOptions ) { super(name, true, isStored, false, TextSearchInfo.SIMPLE_MATCH_ONLY, meta); - this.indexOptions = indexOptions; + this.indexOptions = indexOptions != null + ? indexOptions + : SparseVectorFieldMapper.indexVersionSupportsDefaultPruningConfig(indexVersion) + ? new IndexOptions(true, new TokenPruningConfig()) + : null; } public IndexOptions getIndexOptions() { @@ -269,35 +263,27 @@ public Query finalizeSparseVectorQuery( Boolean shouldPruneTokensFromQuery, TokenPruningConfig tokenPruningConfigFromQuery ) throws IOException { - TokenPruningConfig pruningConfig = null; - - if (shouldPruneTokensFromQuery != null) { - // if this is not null, the query is overriding the index config - pruningConfig = shouldPruneTokensFromQuery ? tokenPruningConfigFromQuery : null; - } else { - // check and see if we explicitly do not prune in the index_options - boolean explicitlyDoNotPrune = this.indexOptions != null - && this.indexOptions.prune != null - && this.indexOptions.prune == false; - - if (explicitlyDoNotPrune == false) { - // get the explicit pruning config from the index_options if available - pruningConfig = this.indexOptions != null ? this.indexOptions.pruningConfig : null; - - // if we're still null, set the default based on the index version - // newer index versions default to true, while older is false - pruningConfig = pruningConfig == null ? getDefaultPruningConfig(context) : pruningConfig; + Boolean shouldPruneTokens = shouldPruneTokensFromQuery; + TokenPruningConfig tokenPruningConfig = tokenPruningConfigFromQuery; + + if (indexOptions != null) { + if (shouldPruneTokens == null && indexOptions.prune != null) { + shouldPruneTokens = indexOptions.prune; } - } - return (pruningConfig != null) - ? WeightedTokensUtils.queryBuilderWithPrunedTokens(fieldName, pruningConfig, queryVectors, this, context) - : WeightedTokensUtils.queryBuilderWithAllTokens(fieldName, queryVectors, this, context); - } + if (tokenPruningConfig == null && indexOptions.pruningConfig != null) { + tokenPruningConfig = indexOptions.pruningConfig; + } + } - private TokenPruningConfig getDefaultPruningConfig(SearchExecutionContext context) { - IndexVersion indexVersion = context.indexVersionCreated(); - return (shouldHaveDefaultPruningConfig(indexVersion)) ? new TokenPruningConfig() : null; + return (shouldPruneTokens != null && shouldPruneTokens) + ? WeightedTokensUtils.queryBuilderWithPrunedTokens( + fieldName, + tokenPruningConfig == null ? new TokenPruningConfig() : tokenPruningConfig, + queryVectors, + this, + context + ) : WeightedTokensUtils.queryBuilderWithAllTokens(fieldName, queryVectors, this, context); } private static String indexedValueForSearch(Object value) { @@ -308,8 +294,14 @@ private static String indexedValueForSearch(Object value) { } } - private SparseVectorFieldMapper(String simpleName, MappedFieldType mappedFieldType, BuilderParams builderParams) { + private SparseVectorFieldMapper( + String simpleName, + MappedFieldType mappedFieldType, + BuilderParams builderParams, + IndexVersion indexVersion + ) { super(simpleName, mappedFieldType, builderParams); + this.indexVersion = indexVersion; } @Override @@ -327,7 +319,7 @@ public Map indexAnalyzers() { @Override public FieldMapper.Builder getMergeBuilder() { - return new Builder(leafName(), null).init(this); + return new Builder(leafName(), indexVersion).init(this); } @Override @@ -352,8 +344,7 @@ public void parse(DocumentParserContext context) throws IOException { if (context.parser().currentToken() != Token.START_OBJECT) { throw new IllegalArgumentException( - "[sparse_vector] fields must be json objects, expected a START_OBJECT but got: " + context.parser().currentToken() - ); + "[sparse_vector] fields must be json objects, expected a START_OBJECT but got: " + context.parser().currentToken()); } final boolean isWithinLeaf = context.path().isWithinLeafObject(); @@ -381,10 +372,8 @@ public void parse(DocumentParserContext context) throws IOException { } } else { throw new IllegalArgumentException( - "[sparse_vector] fields take hashes that map a feature to a strictly positive " - + "float, but got unexpected token " - + token - ); + "[sparse_vector] fields take hashes that map a feature to a strictly positive " + "float, but got unexpected token " + + token); } } if (context.indexSettings().getIndexVersionCreated().onOrAfter(SPARSE_VECTOR_IN_FIELD_NAMES_INDEX_VERSION)) { @@ -405,18 +394,12 @@ protected String contentType() { return CONTENT_TYPE; } - private static boolean shouldHaveDefaultPruningConfig(IndexVersion indexVersion) { - if (indexVersion.onOrAfter(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION)) { - // default pruning for 9.1.0+ is true for this index - return true; - } - - // default pruning for 8.19.0+ is true for this index - if (indexVersion.between(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION_8_X, IndexVersions.UPGRADE_TO_LUCENE_10_0_0)) { - return true; - } - - return false; + private static boolean indexVersionSupportsDefaultPruningConfig(IndexVersion indexVersion) { + // default pruning for 9.1.0+ or 8.19.0+ is true for this index + return ( + indexVersion.onOrAfter(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION) || + indexVersion.between(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION_8_X, IndexVersions.UPGRADE_TO_LUCENE_10_0_0) + ); } private static class SparseVectorValueFetcher implements ValueFetcher { @@ -539,21 +522,19 @@ public static class IndexOptions implements ToXContent { IndexOptions(@Nullable Boolean prune, @Nullable TokenPruningConfig pruningConfig) { if (pruningConfig != null && (prune == null || prune == false)) { throw new IllegalArgumentException( - "[" - + SPARSE_VECTOR_INDEX_OPTIONS - + "] field [" - + PRUNING_CONFIG_FIELD_NAME.getPreferredName() - + "] should only be set if [" - + PRUNE_FIELD_NAME.getPreferredName() - + "] is set to true" - ); + "[" + SPARSE_VECTOR_INDEX_OPTIONS + "] field [" + PRUNING_CONFIG_FIELD_NAME.getPreferredName() + + "] should only be set if [" + PRUNE_FIELD_NAME.getPreferredName() + "] is set to true"); } this.prune = prune; this.pruningConfig = pruningConfig; } - public static boolean isDefaultOptions(IndexOptions indexOptions) { + public static boolean isDefaultOptions(IndexOptions indexOptions, IndexVersion indexVersion) { + if (indexVersionSupportsDefaultPruningConfig(indexVersion) == false) { + return indexOptions == null; + } + if (indexOptions == null || indexOptions.prune == null || indexOptions.prune == false || indexOptions.pruningConfig == null) { return false; } @@ -604,5 +585,4 @@ public final int hashCode() { return Objects.hash(prune, pruningConfig); } } - } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java index ef13b5888faf1..a7977250bf65d 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java @@ -629,7 +629,7 @@ public void testTypeQueryFinalizationDefaultsCurrentVersion() throws Exception { MapperService mapperService = createMapperService(version, fieldMapping(this::minimalMapping)); // query should be pruned by default on newer index versions - performTypeQueryFinalizationTest(mapperService, null, null, null, true); + performTypeQueryFinalizationTest(version, mapperService, null, null, null, true); } public void testTypeQueryFinalizationDefaultsPreviousVersion() throws Exception { @@ -641,7 +641,7 @@ public void testTypeQueryFinalizationDefaultsPreviousVersion() throws Exception MapperService mapperService = createMapperService(version, fieldMapping(this::minimalMapping)); // query should _not_ be pruned by default on older index versions - performTypeQueryFinalizationTest(mapperService, null, null, null, false); + performTypeQueryFinalizationTest(version, mapperService, null, null, null, false); } public void testTypeQueryFinalizationWithIndexExplicit() throws Exception { @@ -650,6 +650,7 @@ public void testTypeQueryFinalizationWithIndexExplicit() throws Exception { // query should be pruned via explicit index options performTypeQueryFinalizationTest( + version, mapperService, new SparseVectorFieldMapper.IndexOptions(true, new TokenPruningConfig()), null, @@ -663,7 +664,14 @@ public void testTypeQueryFinalizationWithIndexExplicitDoNotPrune() throws Except MapperService mapperService = createMapperService(version, fieldMapping(this::mappingWithIndexOptionsPruneFalse)); // query should be pruned via explicit index options - performTypeQueryFinalizationTest(mapperService, new SparseVectorFieldMapper.IndexOptions(false, null), null, null, false); + performTypeQueryFinalizationTest( + version, + mapperService, + new SparseVectorFieldMapper.IndexOptions(false, null), + null, + null, + false + ); } public void testTypeQueryFinalizationQueryOverridesPruning() throws Exception { @@ -672,6 +680,7 @@ public void testTypeQueryFinalizationQueryOverridesPruning() throws Exception { // query should still be pruned due to query builder setting it performTypeQueryFinalizationTest( + version, mapperService, new SparseVectorFieldMapper.IndexOptions(false, null), true, @@ -686,6 +695,7 @@ public void testTypeQueryFinalizationQueryOverridesPruningOff() throws Exception // query should not pruned due to query builder setting it performTypeQueryFinalizationTest( + version, mapperService, new SparseVectorFieldMapper.IndexOptions(true, new TokenPruningConfig()), false, @@ -695,6 +705,7 @@ public void testTypeQueryFinalizationQueryOverridesPruningOff() throws Exception } private void performTypeQueryFinalizationTest( + IndexVersion indexVersion, MapperService mapperService, SparseVectorFieldMapper.IndexOptions indexOptions, @Nullable Boolean queryPrune, @@ -703,6 +714,7 @@ private void performTypeQueryFinalizationTest( ) throws IOException { withSearchExecutionContext(mapperService, (context) -> { SparseVectorFieldMapper.SparseVectorFieldType ft = new SparseVectorFieldMapper.SparseVectorFieldType( + indexVersion, "field", false, Collections.emptyMap(), @@ -776,7 +788,14 @@ private void runTestTypeQueryFinalization( } try { - performTypeQueryFinalizationTest(mapperService, indexOptions, shouldQueryPrune, queryPruningConfig, resultShouldBePruned); + performTypeQueryFinalizationTest( + version, + mapperService, + indexOptions, + shouldQueryPrune, + queryPruningConfig, + resultShouldBePruned + ); } catch (AssertionError e) { String message = "performTypeQueryFinalizationTest failed using parameters: " + "usePreviousIndex: " diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldTypeTests.java index 0dbe3817c3e87..ecf0936be28b2 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldTypeTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldTypeTests.java @@ -9,6 +9,7 @@ package org.elasticsearch.index.mapper.vectors; +import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.fielddata.FieldDataContext; import org.elasticsearch.index.mapper.FieldTypeTestCase; import org.elasticsearch.index.mapper.MappedFieldType; @@ -18,13 +19,15 @@ public class SparseVectorFieldTypeTests extends FieldTypeTestCase { public void testDocValuesDisabled() { - MappedFieldType fieldType = new SparseVectorFieldMapper.SparseVectorFieldType("field", false, Collections.emptyMap()); + IndexVersion indexVersion = IndexVersion.current(); + MappedFieldType fieldType = new SparseVectorFieldMapper.SparseVectorFieldType(indexVersion, "field", false, Collections.emptyMap()); assertFalse(fieldType.hasDocValues()); expectThrows(IllegalArgumentException.class, () -> fieldType.fielddataBuilder(FieldDataContext.noRuntimeFields("test"))); } public void testIsNotAggregatable() { - MappedFieldType fieldType = new SparseVectorFieldMapper.SparseVectorFieldType("field", false, Collections.emptyMap()); + IndexVersion indexVersion = IndexVersion.current(); + MappedFieldType fieldType = new SparseVectorFieldMapper.SparseVectorFieldType(indexVersion, "field", false, Collections.emptyMap()); assertFalse(fieldType.isAggregatable()); } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java index 92337c8e7fc8d..55f172e34fdd4 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java @@ -1077,7 +1077,9 @@ private static Mapper.Builder createEmbeddingsField( boolean useLegacyFormat ) { return switch (modelSettings.taskType()) { - case SPARSE_EMBEDDING -> new SparseVectorFieldMapper.Builder(CHUNKED_EMBEDDINGS_FIELD).setStored(useLegacyFormat == false); + case SPARSE_EMBEDDING -> new SparseVectorFieldMapper + .Builder(CHUNKED_EMBEDDINGS_FIELD, indexVersionCreated) + .setStored(useLegacyFormat == false); case TEXT_EMBEDDING -> { DenseVectorFieldMapper.Builder denseVectorMapperBuilder = new DenseVectorFieldMapper.Builder( CHUNKED_EMBEDDINGS_FIELD, From fd61d59e498f5b89cf21ef3b5030030ea4c477d3 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Tue, 17 Jun 2025 00:28:12 +0000 Subject: [PATCH 18/37] [CI] Auto commit changes from spotless --- .../xpack/inference/mapper/SemanticTextFieldMapper.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java index 55f172e34fdd4..40cbf918f768d 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java @@ -1077,9 +1077,9 @@ private static Mapper.Builder createEmbeddingsField( boolean useLegacyFormat ) { return switch (modelSettings.taskType()) { - case SPARSE_EMBEDDING -> new SparseVectorFieldMapper - .Builder(CHUNKED_EMBEDDINGS_FIELD, indexVersionCreated) - .setStored(useLegacyFormat == false); + case SPARSE_EMBEDDING -> new SparseVectorFieldMapper.Builder(CHUNKED_EMBEDDINGS_FIELD, indexVersionCreated).setStored( + useLegacyFormat == false + ); case TEXT_EMBEDDING -> { DenseVectorFieldMapper.Builder denseVectorMapperBuilder = new DenseVectorFieldMapper.Builder( CHUNKED_EMBEDDINGS_FIELD, From d8128211ed9f5122db3a600ae49ae37ddcfdeca8 Mon Sep 17 00:00:00 2001 From: "Mark J. Hoy" Date: Mon, 16 Jun 2025 20:41:02 -0400 Subject: [PATCH 19/37] fix transport versions merge --- .../src/main/java/org/elasticsearch/TransportVersions.java | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java index 5f965fd9c229d..a7fe5c7b38057 100644 --- a/server/src/main/java/org/elasticsearch/TransportVersions.java +++ b/server/src/main/java/org/elasticsearch/TransportVersions.java @@ -300,11 +300,8 @@ static TransportVersion def(int id) { public static final TransportVersion NONE_CHUNKING_STRATEGY = def(9_097_0_00); public static final TransportVersion PROJECT_DELETION_GLOBAL_BLOCK = def(9_098_0_00); public static final TransportVersion SECURITY_CLOUD_API_KEY_REALM_AND_TYPE = def(9_099_0_00); -<<<<<<< markjhoy/add_sparse_vector_token_pruning_index_options - public static final TransportVersion SPARSE_VECTOR_FIELD_PRUNING_OPTIONS = def(9_100_0_00); -======= public static final TransportVersion STATE_PARAM_GET_SNAPSHOT = def(9_100_0_00); ->>>>>>> main + public static final TransportVersion SPARSE_VECTOR_FIELD_PRUNING_OPTIONS = def(9_101_0_00); /* * STOP! READ THIS FIRST! No, really, From b7c990438d5717284c07209727d5497a1e5d2351 Mon Sep 17 00:00:00 2001 From: "Mark J. Hoy" Date: Mon, 16 Jun 2025 20:47:52 -0400 Subject: [PATCH 20/37] fix up docs --- .../elasticsearch/mapping-reference/sparse-vector.md | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/docs/reference/elasticsearch/mapping-reference/sparse-vector.md b/docs/reference/elasticsearch/mapping-reference/sparse-vector.md index 54f939773c07a..d24b2cc7e1a02 100644 --- a/docs/reference/elasticsearch/mapping-reference/sparse-vector.md +++ b/docs/reference/elasticsearch/mapping-reference/sparse-vector.md @@ -24,6 +24,7 @@ PUT my-index } ``` +{applies_to}`stack: preview 9.1` With any new indices created, token pruning will be turned on by default with appropriate defaults. You can control this behaviour using the optional `index_options` parameters for the field: ```console @@ -58,8 +59,8 @@ The following parameters are accepted by `sparse_vector` fields: * Exclude the field from [_source](/reference/elasticsearch/rest-apis/retrieve-selected-fields.md#source-filtering). * Use [synthetic `_source`](/reference/elasticsearch/mapping-reference/mapping-source-field.md#synthetic-source). -index_options -: (Optional, object) You can set index options for your `sparse_vector` field to determine if you should prune tokens, and the parameter configurations for the token pruning. If pruning options are not set in your `sparse_query` vector, Elasticsearch will use the default options configured for the field, if any. The available options for the index options are: +index_options {applies_to}`stack: preview 9.1` +: (Optional, object) You can set index options for your `sparse_vector` field to determine if you should prune tokens, and the parameter configurations for the token pruning. If pruning options are not set in your [`sparse_vector` query](/reference/query-languages/query-dsl/query-dsl-sparse-vector-query.md), Elasticsearch will use the default options configured for the field, if any. Parameters for `index_options` are: @@ -87,7 +88,9 @@ Non-significant tokens can be defined as tokens that meet both of the following * The weight/score is so low that the token is likely not very relevant to the original term Both the token frequency threshold and weight threshold must show the token is non-significant in order for the token to be pruned. -This ensures the tokens that are kept are frequent enough and have very high scoring or very infrequent tokens that may not have as high of a score. +This ensures that: +* The tokens that are kept are frequent enough and have significant scoring. +* Very infrequent tokens that may not have as high of a score are removed. ## Multi-value sparse vectors [index-multi-value-sparse-vectors] From 350910c6a1131e350bdb9e4c7ec44d80c94dcfd1 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Tue, 17 Jun 2025 00:57:33 +0000 Subject: [PATCH 21/37] [CI] Auto commit changes from spotless --- .../vectors/SparseVectorFieldMapper.java | 56 +++++++++++-------- .../vectors/SparseVectorFieldMapperTests.java | 9 +-- 2 files changed, 35 insertions(+), 30 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java index e62aac5401405..3d9212e5282c1 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java @@ -99,7 +99,8 @@ public static class Builder extends FieldMapper.Builder { private final Parameter stored = Parameter.storeParam(m -> toType(m).fieldType().isStored(), false); private final Parameter> meta = Parameter.metaParam(); - private final Parameter indexOptions = new Parameter<>(SPARSE_VECTOR_INDEX_OPTIONS, + private final Parameter indexOptions = new Parameter<>( + SPARSE_VECTOR_INDEX_OPTIONS, true, () -> null, (n, c, o) -> parseIndexOptions(c, o), @@ -170,8 +171,12 @@ private static SparseVectorFieldMapper.IndexOptions parseIndexOptions(MappingPar Map indexOptionsMap = XContentMapValues.nodeMapValue(propNode, SPARSE_VECTOR_INDEX_OPTIONS); - XContentParser parser = - new MapXContentParser(NamedXContentRegistry.EMPTY, DeprecationHandler.IGNORE_DEPRECATIONS, indexOptionsMap, XContentType.JSON); + XContentParser parser = new MapXContentParser( + NamedXContentRegistry.EMPTY, + DeprecationHandler.IGNORE_DEPRECATIONS, + indexOptionsMap, + XContentType.JSON + ); try { return INDEX_OPTIONS_PARSER.parse(parser, null); @@ -205,11 +210,10 @@ public SparseVectorFieldType( @Nullable SparseVectorFieldMapper.IndexOptions indexOptions ) { super(name, true, isStored, false, TextSearchInfo.SIMPLE_MATCH_ONLY, meta); - this.indexOptions = indexOptions != null - ? indexOptions + this.indexOptions = indexOptions != null ? indexOptions : SparseVectorFieldMapper.indexVersionSupportsDefaultPruningConfig(indexVersion) ? new IndexOptions(true, new TokenPruningConfig()) - : null; + : null; } public IndexOptions getIndexOptions() { @@ -277,13 +281,14 @@ public Query finalizeSparseVectorQuery( } return (shouldPruneTokens != null && shouldPruneTokens) - ? WeightedTokensUtils.queryBuilderWithPrunedTokens( - fieldName, - tokenPruningConfig == null ? new TokenPruningConfig() : tokenPruningConfig, - queryVectors, - this, - context - ) : WeightedTokensUtils.queryBuilderWithAllTokens(fieldName, queryVectors, this, context); + ? WeightedTokensUtils.queryBuilderWithPrunedTokens( + fieldName, + tokenPruningConfig == null ? new TokenPruningConfig() : tokenPruningConfig, + queryVectors, + this, + context + ) + : WeightedTokensUtils.queryBuilderWithAllTokens(fieldName, queryVectors, this, context); } private static String indexedValueForSearch(Object value) { @@ -344,7 +349,8 @@ public void parse(DocumentParserContext context) throws IOException { if (context.parser().currentToken() != Token.START_OBJECT) { throw new IllegalArgumentException( - "[sparse_vector] fields must be json objects, expected a START_OBJECT but got: " + context.parser().currentToken()); + "[sparse_vector] fields must be json objects, expected a START_OBJECT but got: " + context.parser().currentToken() + ); } final boolean isWithinLeaf = context.path().isWithinLeafObject(); @@ -372,8 +378,10 @@ public void parse(DocumentParserContext context) throws IOException { } } else { throw new IllegalArgumentException( - "[sparse_vector] fields take hashes that map a feature to a strictly positive " + "float, but got unexpected token " - + token); + "[sparse_vector] fields take hashes that map a feature to a strictly positive " + + "float, but got unexpected token " + + token + ); } } if (context.indexSettings().getIndexVersionCreated().onOrAfter(SPARSE_VECTOR_IN_FIELD_NAMES_INDEX_VERSION)) { @@ -396,10 +404,8 @@ protected String contentType() { private static boolean indexVersionSupportsDefaultPruningConfig(IndexVersion indexVersion) { // default pruning for 9.1.0+ or 8.19.0+ is true for this index - return ( - indexVersion.onOrAfter(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION) || - indexVersion.between(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION_8_X, IndexVersions.UPGRADE_TO_LUCENE_10_0_0) - ); + return (indexVersion.onOrAfter(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION) + || indexVersion.between(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION_8_X, IndexVersions.UPGRADE_TO_LUCENE_10_0_0)); } private static class SparseVectorValueFetcher implements ValueFetcher { @@ -522,8 +528,14 @@ public static class IndexOptions implements ToXContent { IndexOptions(@Nullable Boolean prune, @Nullable TokenPruningConfig pruningConfig) { if (pruningConfig != null && (prune == null || prune == false)) { throw new IllegalArgumentException( - "[" + SPARSE_VECTOR_INDEX_OPTIONS + "] field [" + PRUNING_CONFIG_FIELD_NAME.getPreferredName() - + "] should only be set if [" + PRUNE_FIELD_NAME.getPreferredName() + "] is set to true"); + "[" + + SPARSE_VECTOR_INDEX_OPTIONS + + "] field [" + + PRUNING_CONFIG_FIELD_NAME.getPreferredName() + + "] should only be set if [" + + PRUNE_FIELD_NAME.getPreferredName() + + "] is set to true" + ); } this.prune = prune; diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java index a7977250bf65d..24059ab653214 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java @@ -664,14 +664,7 @@ public void testTypeQueryFinalizationWithIndexExplicitDoNotPrune() throws Except MapperService mapperService = createMapperService(version, fieldMapping(this::mappingWithIndexOptionsPruneFalse)); // query should be pruned via explicit index options - performTypeQueryFinalizationTest( - version, - mapperService, - new SparseVectorFieldMapper.IndexOptions(false, null), - null, - null, - false - ); + performTypeQueryFinalizationTest(version, mapperService, new SparseVectorFieldMapper.IndexOptions(false, null), null, null, false); } public void testTypeQueryFinalizationQueryOverridesPruning() throws Exception { From aeec0ca5e27970ac182a6832480dd538d02dcfdc Mon Sep 17 00:00:00 2001 From: "Mark J. Hoy" Date: Tue, 17 Jun 2025 12:05:23 -0400 Subject: [PATCH 22/37] fix docs; add include_defaults unit and yaml test --- .../mapping-reference/sparse-vector.md | 6 +++- .../vectors/SparseVectorFieldMapper.java | 6 +++- .../vectors/SparseVectorFieldMapperTests.java | 29 +++++++++++++++++++ .../test/ml/sparse_vector_search.yml | 10 ++++--- 4 files changed, 45 insertions(+), 6 deletions(-) diff --git a/docs/reference/elasticsearch/mapping-reference/sparse-vector.md b/docs/reference/elasticsearch/mapping-reference/sparse-vector.md index d24b2cc7e1a02..98d2edc097575 100644 --- a/docs/reference/elasticsearch/mapping-reference/sparse-vector.md +++ b/docs/reference/elasticsearch/mapping-reference/sparse-vector.md @@ -24,7 +24,11 @@ PUT my-index } ``` -{applies_to}`stack: preview 9.1` +## Token pruning +```{applies_to} +stack: preview 9.1 +``` + With any new indices created, token pruning will be turned on by default with appropriate defaults. You can control this behaviour using the optional `index_options` parameters for the field: ```console diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java index 3d9212e5282c1..fbbc37e952966 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java @@ -146,7 +146,11 @@ public SparseVectorFieldMapper build(MapperBuilderContext context) { } private boolean indexOptionsSerializerCheck(boolean includeDefaults, boolean isConfigured, IndexOptions value) { - return includeDefaults || (value == null || IndexOptions.isDefaultOptions(value, indexVersion)) == false; + return ( + indexVersionSupportsDefaultPruningConfig(indexVersion) && ( + includeDefaults || (value == null || IndexOptions.isDefaultOptions(value, indexVersion)) == false + ) + ); } } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java index 24059ab653214..4242561e4bda7 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java @@ -40,6 +40,7 @@ import org.elasticsearch.test.index.IndexVersionUtils; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentParseException; +import org.elasticsearch.xcontent.json.JsonXContent; import org.hamcrest.Matchers; import org.junit.AssumptionViolatedException; @@ -80,6 +81,21 @@ protected void minimalMapping(XContentBuilder b) throws IOException { b.field("type", "sparse_vector"); } + protected void minimalMappingWithExplicitDefaults(XContentBuilder b) throws IOException { + b.field("type", "sparse_vector"); + b.startObject("index_options"); + { + b.field("prune", true); + b.startObject("pruning_config"); + { + b.field("tokens_freq_ratio_threshold", TokenPruningConfig.DEFAULT_TOKENS_FREQ_RATIO_THRESHOLD); + b.field("tokens_weight_threshold", TokenPruningConfig.DEFAULT_TOKENS_WEIGHT_THRESHOLD); + } + b.endObject(); + } + b.endObject(); + } + protected void minimalMappingWithExplicitIndexOptions(XContentBuilder b) throws IOException { b.field("type", "sparse_vector"); b.startObject("index_options"); @@ -195,6 +211,19 @@ public void testDefaults() throws Exception { assertTrue(freq1 < freq2); } + public void testDefaultsWithIncludeDefaults() throws Exception { + XContentBuilder orig = JsonXContent.contentBuilder().startObject(); + createMapperService(fieldMapping(this::minimalMapping)).documentMapper().mapping().toXContent(orig, INCLUDE_DEFAULTS); + orig.endObject(); + + XContentBuilder withDefaults = JsonXContent.contentBuilder().startObject(); + createMapperService(fieldMapping(this::minimalMappingWithExplicitDefaults)).documentMapper().mapping() + .toXContent(withDefaults, INCLUDE_DEFAULTS); + withDefaults.endObject(); + + assertEquals(Strings.toString(withDefaults), Strings.toString(orig)); + } + public void testMappingWithoutIndexOptionsUsesDefaults() throws Exception { DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); assertEquals(Strings.toString(fieldMapping(this::minimalMapping)), mapper.mappingSource().toString()); diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/sparse_vector_search.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/sparse_vector_search.yml index 4e968b6db3f57..846ba3289f902 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/sparse_vector_search.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/sparse_vector_search.yml @@ -595,13 +595,15 @@ teardown: - do: headers: Content-Type: application/json - indices.get_mapping: + indices.get_field_mapping: index: sparse_vector_pruning_test + fields: ml.tokens + include_defaults: true # the index_options with pruning defaults will be serialized here explicitly - - match: { sparse_vector_pruning_test.mappings.properties.ml.properties.tokens.index_options.prune: true } - - match: { sparse_vector_pruning_test.mappings.properties.ml.properties.tokens.index_options.pruning_config.tokens_freq_ratio_threshold: 5.0 } - - match: { sparse_vector_pruning_test.mappings.properties.ml.properties.tokens.index_options.pruning_config.tokens_weight_threshold: 0.4 } + - match: { sparse_vector_pruning_test.mappings.ml.tokens.mapping.tokens.index_options.prune: true } + - match: { sparse_vector_pruning_test.mappings.ml.tokens.mapping.tokens.index_options.pruning_config.tokens_freq_ratio_threshold: 5.0 } + - match: { sparse_vector_pruning_test.mappings.ml.tokens.mapping.tokens.index_options.pruning_config.tokens_weight_threshold: 0.4 } --- From 7a68727555387bbb2e7e9db5357637810f920708 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Tue, 17 Jun 2025 16:16:45 +0000 Subject: [PATCH 23/37] [CI] Auto commit changes from spotless --- .../index/mapper/vectors/SparseVectorFieldMapper.java | 7 ++----- .../index/mapper/vectors/SparseVectorFieldMapperTests.java | 3 ++- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java index fbbc37e952966..4174c5fb6251e 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java @@ -146,11 +146,8 @@ public SparseVectorFieldMapper build(MapperBuilderContext context) { } private boolean indexOptionsSerializerCheck(boolean includeDefaults, boolean isConfigured, IndexOptions value) { - return ( - indexVersionSupportsDefaultPruningConfig(indexVersion) && ( - includeDefaults || (value == null || IndexOptions.isDefaultOptions(value, indexVersion)) == false - ) - ); + return (indexVersionSupportsDefaultPruningConfig(indexVersion) + && (includeDefaults || (value == null || IndexOptions.isDefaultOptions(value, indexVersion)) == false)); } } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java index 4242561e4bda7..90f984545a9bb 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java @@ -217,7 +217,8 @@ public void testDefaultsWithIncludeDefaults() throws Exception { orig.endObject(); XContentBuilder withDefaults = JsonXContent.contentBuilder().startObject(); - createMapperService(fieldMapping(this::minimalMappingWithExplicitDefaults)).documentMapper().mapping() + createMapperService(fieldMapping(this::minimalMappingWithExplicitDefaults)).documentMapper() + .mapping() .toXContent(withDefaults, INCLUDE_DEFAULTS); withDefaults.endObject(); From 7f40f95dec06d7055e6596c1ff64316026711b22 Mon Sep 17 00:00:00 2001 From: "Mark J. Hoy" Date: Tue, 17 Jun 2025 16:04:16 -0400 Subject: [PATCH 24/37] override getIndexReaderManager for SemanticQueryBuilderTests --- .../queries/SemanticQueryBuilderTests.java | 61 +++++++++++++------ 1 file changed, 43 insertions(+), 18 deletions(-) diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilderTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilderTests.java index e0f8d51820201..fe5fbc7fe0eb0 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilderTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilderTests.java @@ -9,6 +9,10 @@ import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FloatDocValuesField; +import org.apache.lucene.document.TextField; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BoostQuery; @@ -17,6 +21,7 @@ import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.join.ScoreMode; +import org.apache.lucene.tests.index.RandomIndexWriter; import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.ActionRequest; import org.elasticsearch.action.ActionType; @@ -30,6 +35,7 @@ import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.IOUtils; +import org.elasticsearch.core.Nullable; import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.mapper.InferenceMetadataFieldsMapper; import org.elasticsearch.index.mapper.MapperService; @@ -99,6 +105,7 @@ public class SemanticQueryBuilderTests extends AbstractQueryTestCase { - assertThat(c.occur(), equalTo(SHOULD)); - assertThat(c.query(), instanceOf(BoostQuery.class)); - assertThat(((BoostQuery) c.query()).getBoost(), equalTo(TOKEN_WEIGHT)); - }); + // no clauses as tokens would be pruned + assertThat(innerBooleanQuery.clauses().size(), equalTo(0)); } private void assertTextEmbeddingLuceneQuery(Query query) { @@ -376,18 +395,7 @@ private static SourceToParse buildSemanticTextFieldWithInferenceResults( DenseVectorFieldMapper.ElementType denseVectorElementType, boolean useLegacyFormat ) throws IOException { - var modelSettings = switch (inferenceResultType) { - case NONE -> null; - case SPARSE_EMBEDDING -> new MinimalServiceSettings("my-service", TaskType.SPARSE_EMBEDDING, null, null, null); - case TEXT_EMBEDDING -> new MinimalServiceSettings( - "my-service", - TaskType.TEXT_EMBEDDING, - TEXT_EMBEDDING_DIMENSION_COUNT, - // l2_norm similarity is required for bit embeddings - denseVectorElementType == DenseVectorFieldMapper.ElementType.BIT ? SimilarityMeasure.L2_NORM : SimilarityMeasure.COSINE, - denseVectorElementType - ); - }; + var modelSettings = getModelSettingsForInferenceResultType(inferenceResultType, denseVectorElementType); SourceToParse sourceToParse = null; if (modelSettings != null) { @@ -414,6 +422,23 @@ private static SourceToParse buildSemanticTextFieldWithInferenceResults( return sourceToParse; } + private static MinimalServiceSettings getModelSettingsForInferenceResultType( + InferenceResultType inferenceResultType, @Nullable DenseVectorFieldMapper.ElementType denseVectorElementType + ) { + return switch (inferenceResultType) { + case NONE -> null; + case SPARSE_EMBEDDING -> new MinimalServiceSettings("my-service", TaskType.SPARSE_EMBEDDING, null, null, null); + case TEXT_EMBEDDING -> new MinimalServiceSettings( + "my-service", + TaskType.TEXT_EMBEDDING, + TEXT_EMBEDDING_DIMENSION_COUNT, + // l2_norm similarity is required for bit embeddings + denseVectorElementType == DenseVectorFieldMapper.ElementType.BIT ? SimilarityMeasure.L2_NORM : SimilarityMeasure.COSINE, + denseVectorElementType + ); + }; + } + public static class FakeMlPlugin extends Plugin { @Override public List getNamedWriteables() { From 67e1f8d3a99dd93785873f318872e0fbcff9649d Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Tue, 17 Jun 2025 20:14:21 +0000 Subject: [PATCH 25/37] [CI] Auto commit changes from spotless --- .../xpack/inference/queries/SemanticQueryBuilderTests.java | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilderTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilderTests.java index fe5fbc7fe0eb0..e5c6c7355a454 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilderTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilderTests.java @@ -11,11 +11,9 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; -import org.apache.lucene.document.FloatDocValuesField; import org.apache.lucene.document.TextField; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.KnnByteVectorQuery; import org.apache.lucene.search.KnnFloatVectorQuery; import org.apache.lucene.search.MatchNoDocsQuery; @@ -87,7 +85,6 @@ import static org.apache.lucene.search.BooleanClause.Occur.FILTER; import static org.apache.lucene.search.BooleanClause.Occur.MUST; -import static org.apache.lucene.search.BooleanClause.Occur.SHOULD; import static org.elasticsearch.xpack.core.ml.inference.trainedmodel.InferenceConfig.DEFAULT_RESULTS_FIELD; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.instanceOf; @@ -423,7 +420,8 @@ private static SourceToParse buildSemanticTextFieldWithInferenceResults( } private static MinimalServiceSettings getModelSettingsForInferenceResultType( - InferenceResultType inferenceResultType, @Nullable DenseVectorFieldMapper.ElementType denseVectorElementType + InferenceResultType inferenceResultType, + @Nullable DenseVectorFieldMapper.ElementType denseVectorElementType ) { return switch (inferenceResultType) { case NONE -> null; From c1658672d60262ef3074acfb8a74bd8c22183975 Mon Sep 17 00:00:00 2001 From: "Mark J. Hoy" Date: Tue, 17 Jun 2025 21:10:29 -0400 Subject: [PATCH 26/37] cleanup mapper/builder/tests; index vers. in type still need to refactor / clean YAML tests --- .../vectors/SparseVectorFieldMapper.java | 74 +++---- .../vectors/SparseVectorFieldMapperTests.java | 184 ++++++++++++------ .../test/AbstractQueryTestCase.java | 2 +- .../queries/SemanticQueryBuilderTests.java | 8 +- 4 files changed, 166 insertions(+), 102 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java index 4174c5fb6251e..4b1e6ecd62722 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java @@ -88,14 +88,12 @@ public class SparseVectorFieldMapper extends FieldMapper { public static final NodeFeature SPARSE_VECTOR_INDEX_OPTIONS_FEATURE = new NodeFeature("sparse_vector.index_options_supported"); - private final IndexVersion indexVersion; - private static SparseVectorFieldMapper toType(FieldMapper in) { return (SparseVectorFieldMapper) in; } public static class Builder extends FieldMapper.Builder { - private final IndexVersion indexVersion; + private final IndexVersion indexVersionCreated; private final Parameter stored = Parameter.storeParam(m -> toType(m).fieldType().isStored(), false); private final Parameter> meta = Parameter.metaParam(); @@ -109,9 +107,9 @@ public static class Builder extends FieldMapper.Builder { Objects::toString ).acceptsNull().setSerializerCheck(this::indexOptionsSerializerCheck); - public Builder(String name, @Nullable IndexVersion indexVersion) { + public Builder(String name, IndexVersion indexVersionCreated) { super(name); - this.indexVersion = indexVersion; + this.indexVersionCreated = indexVersionCreated; } public Builder setStored(boolean value) { @@ -127,27 +125,34 @@ protected Parameter[] getParameters() { @Override public SparseVectorFieldMapper build(MapperBuilderContext context) { IndexOptions builderIndexOptions = indexOptions.getValue(); - if (builderIndexOptions == null && indexVersion != null && indexVersionSupportsDefaultPruningConfig(indexVersion)) { - builderIndexOptions = new IndexOptions(true, new TokenPruningConfig()); + if (builderIndexOptions == null) { + builderIndexOptions = getDefaultIndexOptions(indexVersionCreated); } return new SparseVectorFieldMapper( leafName(), new SparseVectorFieldType( - indexVersion, + indexVersionCreated, context.buildFullName(leafName()), stored.getValue(), meta.getValue(), builderIndexOptions ), - builderParams(this, context), - indexVersion + builderParams(this, context) ); } + private IndexOptions getDefaultIndexOptions(IndexVersion indexVersion) { + return ( + indexVersion.onOrAfter(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION) || + indexVersion.between(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION_8_X, IndexVersions.UPGRADE_TO_LUCENE_10_0_0) + ) + ? IndexOptions.DEFAULT_PRUNING_INDEX_OPTIONS + : null; + } + private boolean indexOptionsSerializerCheck(boolean includeDefaults, boolean isConfigured, IndexOptions value) { - return (indexVersionSupportsDefaultPruningConfig(indexVersion) - && (includeDefaults || (value == null || IndexOptions.isDefaultOptions(value, indexVersion)) == false)); + return includeDefaults || (IndexOptions.isDefaultOptions(value, indexVersionCreated) == false); } } @@ -197,24 +202,23 @@ private static SparseVectorFieldMapper.IndexOptions parseIndexOptions(MappingPar }, notInMultiFields(CONTENT_TYPE)); public static final class SparseVectorFieldType extends MappedFieldType { + private final IndexVersion indexVersionCreated; private final IndexOptions indexOptions; - public SparseVectorFieldType(IndexVersion indexVersion, String name, boolean isStored, Map meta) { - this(indexVersion, name, isStored, meta, null); + public SparseVectorFieldType(IndexVersion indexVersionCreated, String name, boolean isStored, Map meta) { + this(indexVersionCreated, name, isStored, meta, null); } public SparseVectorFieldType( - IndexVersion indexVersion, + IndexVersion indexVersionCreated, String name, boolean isStored, Map meta, @Nullable SparseVectorFieldMapper.IndexOptions indexOptions ) { super(name, true, isStored, false, TextSearchInfo.SIMPLE_MATCH_ONLY, meta); - this.indexOptions = indexOptions != null ? indexOptions - : SparseVectorFieldMapper.indexVersionSupportsDefaultPruningConfig(indexVersion) - ? new IndexOptions(true, new TokenPruningConfig()) - : null; + this.indexVersionCreated = indexVersionCreated; + this.indexOptions = indexOptions; } public IndexOptions getIndexOptions() { @@ -298,16 +302,18 @@ private static String indexedValueForSearch(Object value) { } return value.toString(); } + + public IndexVersion getIndexVersionCreated() { + return indexVersionCreated; + } } private SparseVectorFieldMapper( String simpleName, MappedFieldType mappedFieldType, - BuilderParams builderParams, - IndexVersion indexVersion + BuilderParams builderParams ) { super(simpleName, mappedFieldType, builderParams); - this.indexVersion = indexVersion; } @Override @@ -325,6 +331,7 @@ public Map indexAnalyzers() { @Override public FieldMapper.Builder getMergeBuilder() { + IndexVersion indexVersion = this.fieldType() != null ? this.fieldType().getIndexVersionCreated() : IndexVersion.current(); return new Builder(leafName(), indexVersion).init(this); } @@ -405,8 +412,13 @@ protected String contentType() { private static boolean indexVersionSupportsDefaultPruningConfig(IndexVersion indexVersion) { // default pruning for 9.1.0+ or 8.19.0+ is true for this index - return (indexVersion.onOrAfter(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION) - || indexVersion.between(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION_8_X, IndexVersions.UPGRADE_TO_LUCENE_10_0_0)); + return ( + indexVersion != null && + ( + indexVersion.onOrAfter(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION) || + indexVersion.between(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION_8_X, IndexVersions.UPGRADE_TO_LUCENE_10_0_0) + ) + ); } private static class SparseVectorValueFetcher implements ValueFetcher { @@ -522,6 +534,7 @@ public void reset() { public static class IndexOptions implements ToXContent { public static final ParseField PRUNE_FIELD_NAME = new ParseField("prune"); public static final ParseField PRUNING_CONFIG_FIELD_NAME = new ParseField("pruning_config"); + public static final IndexOptions DEFAULT_PRUNING_INDEX_OPTIONS = new IndexOptions(true, new TokenPruningConfig()); final Boolean prune; final TokenPruningConfig pruningConfig; @@ -544,16 +557,11 @@ public static class IndexOptions implements ToXContent { } public static boolean isDefaultOptions(IndexOptions indexOptions, IndexVersion indexVersion) { - if (indexVersionSupportsDefaultPruningConfig(indexVersion) == false) { - return indexOptions == null; - } - - if (indexOptions == null || indexOptions.prune == null || indexOptions.prune == false || indexOptions.pruningConfig == null) { - return false; - } + IndexOptions defaultIndexOptions = indexVersionSupportsDefaultPruningConfig(indexVersion) + ? DEFAULT_PRUNING_INDEX_OPTIONS + : null; - return (indexOptions.pruningConfig.getTokensFreqRatioThreshold() == TokenPruningConfig.DEFAULT_TOKENS_FREQ_RATIO_THRESHOLD - && indexOptions.pruningConfig.getTokensWeightThreshold() == TokenPruningConfig.DEFAULT_TOKENS_WEIGHT_THRESHOLD); + return Objects.equals(indexOptions, defaultIndexOptions); } public Boolean getPrune() { diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java index 90f984545a9bb..f274c30aaf1dc 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java @@ -23,6 +23,8 @@ import org.elasticsearch.common.Strings; import org.elasticsearch.common.compress.CompressedXContent; import org.elasticsearch.core.CheckedConsumer; +import org.elasticsearch.core.CheckedRunnable; +import org.elasticsearch.core.CheckedSupplier; import org.elasticsearch.core.Nullable; import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.IndexVersions; @@ -51,7 +53,9 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Optional; +import static org.elasticsearch.index.IndexVersions.SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT; import static org.elasticsearch.index.IndexVersions.UPGRADE_TO_LUCENE_10_0_0; import static org.elasticsearch.index.mapper.vectors.SparseVectorFieldMapper.NEW_SPARSE_VECTOR_INDEX_VERSION; import static org.elasticsearch.index.mapper.vectors.SparseVectorFieldMapper.PREVIOUS_SPARSE_VECTOR_INDEX_VERSION; @@ -81,8 +85,23 @@ protected void minimalMapping(XContentBuilder b) throws IOException { b.field("type", "sparse_vector"); } + protected void minimalFieldMappingPreviousIndexVersion(XContentBuilder b) throws IOException { + b.field("type", "sparse_vector"); + b.field("store", false); + + b.startObject("meta"); + b.endObject(); + + b.field("index_options", (Object)null); + } + protected void minimalMappingWithExplicitDefaults(XContentBuilder b) throws IOException { b.field("type", "sparse_vector"); + b.field("store", false); + + b.startObject("meta"); + b.endObject(); + b.startObject("index_options"); { b.field("prune", true); @@ -211,15 +230,60 @@ public void testDefaults() throws Exception { assertTrue(freq1 < freq2); } + private void buildDocForSparseVectorFieldMapping(XContentBuilder b, CheckedConsumer supplier) throws IOException { + b.startObject("_doc"); + { + b.startArray("dynamic_date_formats"); + { + b.value("strict_date_optional_time||epoch_millis"); + b.value("yyyy/MM/dd HH:mm:ss||yyyy/MM/dd||epoch_millis"); + } + b.endArray(); + + b.startArray("dynamic_templates"); + b.endArray(); + + b.field("date_detection", true); + b.field("numeric_detection", false); + + b.startObject("properties"); + { + b.startObject("field"); + + supplier.accept(b); + + b.endObject(); + } + b.endObject(); + } + b.endObject(); + }; + public void testDefaultsWithIncludeDefaults() throws Exception { XContentBuilder orig = JsonXContent.contentBuilder().startObject(); createMapperService(fieldMapping(this::minimalMapping)).documentMapper().mapping().toXContent(orig, INCLUDE_DEFAULTS); orig.endObject(); XContentBuilder withDefaults = JsonXContent.contentBuilder().startObject(); - createMapperService(fieldMapping(this::minimalMappingWithExplicitDefaults)).documentMapper() - .mapping() - .toXContent(withDefaults, INCLUDE_DEFAULTS); + buildDocForSparseVectorFieldMapping(withDefaults, this::minimalMappingWithExplicitDefaults); + withDefaults.endObject(); + + assertEquals(Strings.toString(withDefaults), Strings.toString(orig)); + } + + public void testDefaultsWithIncludeDefaultsOlderIndexVersion() throws Exception { + IndexVersion indexVersion = IndexVersionUtils.randomVersionBetween( + random(), + UPGRADE_TO_LUCENE_10_0_0, + IndexVersionUtils.getPreviousVersion(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION) + ); + + XContentBuilder orig = JsonXContent.contentBuilder().startObject(); + createMapperService(indexVersion, fieldMapping(this::minimalMapping)).documentMapper().mapping().toXContent(orig, INCLUDE_DEFAULTS); + orig.endObject(); + + XContentBuilder withDefaults = JsonXContent.contentBuilder().startObject(); + buildDocForSparseVectorFieldMapping(withDefaults, this::minimalFieldMappingPreviousIndexVersion); withDefaults.endObject(); assertEquals(Strings.toString(withDefaults), Strings.toString(orig)); @@ -232,7 +296,7 @@ public void testMappingWithoutIndexOptionsUsesDefaults() throws Exception { IndexVersion preIndexOptionsVersion = IndexVersionUtils.randomVersionBetween( random(), UPGRADE_TO_LUCENE_10_0_0, - SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION + IndexVersionUtils.getPreviousVersion(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION) ); DocumentMapper previousMapper = createDocumentMapper(preIndexOptionsVersion, fieldMapping(this::minimalMapping)); assertEquals(Strings.toString(fieldMapping(this::minimalMapping)), previousMapper.mappingSource().toString()); @@ -624,20 +688,17 @@ public void testTokensWeightThresholdCorrect() { private void withSearchExecutionContext(MapperService mapperService, CheckedConsumer consumer) throws IOException { - for (boolean store : new boolean[] { true, false }) { - var mapper = mapperService.documentMapper(); - try (Directory directory = newDirectory()) { - RandomIndexWriter iw = new RandomIndexWriter(random(), directory); - var sourceToParse = source(this::writeField); - ParsedDocument doc1 = mapper.parse(sourceToParse); - iw.addDocument(doc1.rootDoc()); - iw.close(); - - try (DirectoryReader reader = wrapInMockESDirectoryReader(DirectoryReader.open(directory))) { - LeafReader leafReader = getOnlyLeafReader(reader); - var searchContext = createSearchExecutionContext(mapperService, new IndexSearcher(leafReader)); - consumer.accept(searchContext); - } + var mapper = mapperService.documentMapper(); + try (Directory directory = newDirectory()) { + RandomIndexWriter iw = new RandomIndexWriter(random(), directory); + var sourceToParse = source(this::writeField); + ParsedDocument doc1 = mapper.parse(sourceToParse); + iw.addDocument(doc1.rootDoc()); + iw.close(); + + try (DirectoryReader reader = wrapInMockESDirectoryReader(DirectoryReader.open(directory))) { + var searchContext = createSearchExecutionContext(mapperService, new IndexSearcher(reader)); + consumer.accept(searchContext); } } } @@ -659,19 +720,19 @@ public void testTypeQueryFinalizationDefaultsCurrentVersion() throws Exception { MapperService mapperService = createMapperService(version, fieldMapping(this::minimalMapping)); // query should be pruned by default on newer index versions - performTypeQueryFinalizationTest(version, mapperService, null, null, null, true); + performTypeQueryFinalizationTest(mapperService, null, null, true); } public void testTypeQueryFinalizationDefaultsPreviousVersion() throws Exception { IndexVersion version = IndexVersionUtils.randomVersionBetween( random(), - IndexVersions.UPGRADE_TO_LUCENE_10_2_1, - IndexVersions.SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT + UPGRADE_TO_LUCENE_10_0_0, + IndexVersionUtils.getPreviousVersion(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT) ); MapperService mapperService = createMapperService(version, fieldMapping(this::minimalMapping)); // query should _not_ be pruned by default on older index versions - performTypeQueryFinalizationTest(version, mapperService, null, null, null, false); + performTypeQueryFinalizationTest(mapperService, null, null, false); } public void testTypeQueryFinalizationWithIndexExplicit() throws Exception { @@ -680,9 +741,7 @@ public void testTypeQueryFinalizationWithIndexExplicit() throws Exception { // query should be pruned via explicit index options performTypeQueryFinalizationTest( - version, mapperService, - new SparseVectorFieldMapper.IndexOptions(true, new TokenPruningConfig()), null, null, true @@ -694,7 +753,7 @@ public void testTypeQueryFinalizationWithIndexExplicitDoNotPrune() throws Except MapperService mapperService = createMapperService(version, fieldMapping(this::mappingWithIndexOptionsPruneFalse)); // query should be pruned via explicit index options - performTypeQueryFinalizationTest(version, mapperService, new SparseVectorFieldMapper.IndexOptions(false, null), null, null, false); + performTypeQueryFinalizationTest(mapperService, null, null, false); } public void testTypeQueryFinalizationQueryOverridesPruning() throws Exception { @@ -703,9 +762,7 @@ public void testTypeQueryFinalizationQueryOverridesPruning() throws Exception { // query should still be pruned due to query builder setting it performTypeQueryFinalizationTest( - version, mapperService, - new SparseVectorFieldMapper.IndexOptions(false, null), true, new TokenPruningConfig(), true @@ -718,9 +775,7 @@ public void testTypeQueryFinalizationQueryOverridesPruningOff() throws Exception // query should not pruned due to query builder setting it performTypeQueryFinalizationTest( - version, mapperService, - new SparseVectorFieldMapper.IndexOptions(true, new TokenPruningConfig()), false, null, false @@ -728,21 +783,14 @@ public void testTypeQueryFinalizationQueryOverridesPruningOff() throws Exception } private void performTypeQueryFinalizationTest( - IndexVersion indexVersion, MapperService mapperService, - SparseVectorFieldMapper.IndexOptions indexOptions, @Nullable Boolean queryPrune, @Nullable TokenPruningConfig queryTokenPruningConfig, boolean queryShouldBePruned ) throws IOException { withSearchExecutionContext(mapperService, (context) -> { - SparseVectorFieldMapper.SparseVectorFieldType ft = new SparseVectorFieldMapper.SparseVectorFieldType( - indexVersion, - "field", - false, - Collections.emptyMap(), - indexOptions - ); + SparseVectorFieldMapper.SparseVectorFieldType ft = (SparseVectorFieldMapper.SparseVectorFieldType) mapperService + .fieldType("field"); Query finalizedQuery = ft.finalizeSparseVectorQuery(context, "field", QUERY_VECTORS, queryPrune, queryTokenPruningConfig); if (queryShouldBePruned) { @@ -777,47 +825,45 @@ private void runTestTypeQueryFinalization( boolean queryOverridesPruning, boolean queryOverrideExplicitFalse ) throws IOException { - IndexVersion version = usePreviousIndex - ? IndexVersionUtils.randomVersionBetween( - random(), - IndexVersions.UPGRADE_TO_LUCENE_10_2_1, - IndexVersions.SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT - ) - : IndexVersions.SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT; - + // get the index version of the test to use + // either a current version that supports index options, or + // a previous version that does now + IndexVersion version = getIndexVersionForTest(usePreviousIndex); + + // create our mapper service + // if we set explicitIndexOptionsDoNotPrune, the index_options (if present) + // will explicitly include "prune: false" MapperService mapperService = getMapperServiceForRandomizedFinalizationTest( version, useIndexOptionsDefaults, explicitIndexOptionsDoNotPrune ); - SparseVectorFieldMapper.IndexOptions indexOptions = getIndexOptionsQueryFinalization( - usePreviousIndex, - useIndexOptionsDefaults, - explicitIndexOptionsDoNotPrune - ); - + // check and see if the query should explicitly override the index_options Boolean shouldQueryPrune = queryOverridesPruning ? (queryOverrideExplicitFalse == false) : null; + // get the pruning configuration for the query if it's overriding TokenPruningConfig queryPruningConfig = queryOverridesPruning && queryOverrideExplicitFalse == false ? new TokenPruningConfig() : null; - boolean resultShouldBePruned = true; - if (queryOverridesPruning && queryOverrideExplicitFalse) { - resultShouldBePruned = false; - } else if (queryOverridesPruning == false && (usePreviousIndex || explicitIndexOptionsDoNotPrune)) { - resultShouldBePruned = false; - } + // our logic if the results should be pruned or not + // we should _not_ prune if any of the following: + // - the query explicitly overrides the options and `prune` is set to false + // - the query does not override the pruning options and: + // - either we are using a previous index version + // - or the index_options explicitly sets `prune` to false + boolean resultShouldNotBePruned = ( + (queryOverridesPruning && queryOverrideExplicitFalse) || + (queryOverridesPruning == false && (usePreviousIndex || explicitIndexOptionsDoNotPrune)) + ); try { performTypeQueryFinalizationTest( - version, mapperService, - indexOptions, shouldQueryPrune, queryPruningConfig, - resultShouldBePruned + resultShouldNotBePruned == false ); } catch (AssertionError e) { String message = "performTypeQueryFinalizationTest failed using parameters: " @@ -836,6 +882,20 @@ private void runTestTypeQueryFinalization( } + private IndexVersion getIndexVersionForTest(boolean usePreviousIndex) { + return usePreviousIndex + ? IndexVersionUtils.randomVersionBetween( + random(), + UPGRADE_TO_LUCENE_10_0_0, + IndexVersionUtils.getPreviousVersion(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT) + ) + : IndexVersionUtils.randomVersionBetween( + random(), + SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT, + IndexVersion.current() + ); + } + private SparseVectorFieldMapper.IndexOptions getIndexOptionsQueryFinalization( boolean usePreviousIndex, boolean useIndexOptionsDefaults, @@ -859,7 +919,7 @@ private MapperService getMapperServiceForRandomizedFinalizationTest( boolean useIndexOptionsDefaults, boolean explicitIndexOptionsDoNotPrune ) throws IOException { - if (useIndexOptionsDefaults) { + if (useIndexOptionsDefaults && explicitIndexOptionsDoNotPrune == false) { return createMapperService(indexVersion, fieldMapping(this::minimalMapping)); } diff --git a/test/framework/src/main/java/org/elasticsearch/test/AbstractQueryTestCase.java b/test/framework/src/main/java/org/elasticsearch/test/AbstractQueryTestCase.java index dba46d716b643..be4443c84eb02 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/AbstractQueryTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/test/AbstractQueryTestCase.java @@ -999,7 +999,7 @@ public void close() throws IOException { } } - protected void initIndexWriter(RandomIndexWriter indexWriter) {} + protected void initIndexWriter(RandomIndexWriter indexWriter) throws IOException {} } public static class NullIndexReaderManager extends IndexReaderManager { diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilderTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilderTests.java index e5c6c7355a454..ac1c8b05b51a8 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilderTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilderTests.java @@ -188,14 +188,10 @@ protected void initializeAdditionalMappings(MapperService mapperService) throws protected IndexReaderManager getIndexReaderManager() { return new IndexReaderManager() { @Override - protected void initIndexWriter(RandomIndexWriter indexWriter) { + protected void initIndexWriter(RandomIndexWriter indexWriter) throws IOException { Document document = new Document(); document.add(new TextField("semantic.inference.chunks.embeddings", "a b x y", Field.Store.NO)); - try { - indexWriter.addDocument(document); - } catch (IOException e) { - throw new RuntimeException(e); - } + indexWriter.addDocument(document); } }; } From 8ac4ebce3694b3f03796a3a50e6d52886664c9bb Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Wed, 18 Jun 2025 01:19:46 +0000 Subject: [PATCH 27/37] [CI] Auto commit changes from spotless --- .../vectors/SparseVectorFieldMapper.java | 30 +++----- .../vectors/SparseVectorFieldMapperTests.java | 68 ++++++------------- 2 files changed, 30 insertions(+), 68 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java index 4b1e6ecd62722..5488212f444d3 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java @@ -143,12 +143,10 @@ public SparseVectorFieldMapper build(MapperBuilderContext context) { } private IndexOptions getDefaultIndexOptions(IndexVersion indexVersion) { - return ( - indexVersion.onOrAfter(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION) || - indexVersion.between(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION_8_X, IndexVersions.UPGRADE_TO_LUCENE_10_0_0) - ) - ? IndexOptions.DEFAULT_PRUNING_INDEX_OPTIONS - : null; + return (indexVersion.onOrAfter(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION) + || indexVersion.between(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION_8_X, IndexVersions.UPGRADE_TO_LUCENE_10_0_0)) + ? IndexOptions.DEFAULT_PRUNING_INDEX_OPTIONS + : null; } private boolean indexOptionsSerializerCheck(boolean includeDefaults, boolean isConfigured, IndexOptions value) { @@ -308,11 +306,7 @@ public IndexVersion getIndexVersionCreated() { } } - private SparseVectorFieldMapper( - String simpleName, - MappedFieldType mappedFieldType, - BuilderParams builderParams - ) { + private SparseVectorFieldMapper(String simpleName, MappedFieldType mappedFieldType, BuilderParams builderParams) { super(simpleName, mappedFieldType, builderParams); } @@ -412,13 +406,9 @@ protected String contentType() { private static boolean indexVersionSupportsDefaultPruningConfig(IndexVersion indexVersion) { // default pruning for 9.1.0+ or 8.19.0+ is true for this index - return ( - indexVersion != null && - ( - indexVersion.onOrAfter(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION) || - indexVersion.between(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION_8_X, IndexVersions.UPGRADE_TO_LUCENE_10_0_0) - ) - ); + return (indexVersion != null + && (indexVersion.onOrAfter(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION) + || indexVersion.between(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION_8_X, IndexVersions.UPGRADE_TO_LUCENE_10_0_0))); } private static class SparseVectorValueFetcher implements ValueFetcher { @@ -558,8 +548,8 @@ public static class IndexOptions implements ToXContent { public static boolean isDefaultOptions(IndexOptions indexOptions, IndexVersion indexVersion) { IndexOptions defaultIndexOptions = indexVersionSupportsDefaultPruningConfig(indexVersion) - ? DEFAULT_PRUNING_INDEX_OPTIONS - : null; + ? DEFAULT_PRUNING_INDEX_OPTIONS + : null; return Objects.equals(indexOptions, defaultIndexOptions); } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java index f274c30aaf1dc..03e32887de044 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java @@ -23,8 +23,6 @@ import org.elasticsearch.common.Strings; import org.elasticsearch.common.compress.CompressedXContent; import org.elasticsearch.core.CheckedConsumer; -import org.elasticsearch.core.CheckedRunnable; -import org.elasticsearch.core.CheckedSupplier; import org.elasticsearch.core.Nullable; import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.IndexVersions; @@ -49,11 +47,9 @@ import java.io.IOException; import java.util.Arrays; import java.util.Collection; -import java.util.Collections; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; -import java.util.Optional; import static org.elasticsearch.index.IndexVersions.SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT; import static org.elasticsearch.index.IndexVersions.UPGRADE_TO_LUCENE_10_0_0; @@ -92,7 +88,7 @@ protected void minimalFieldMappingPreviousIndexVersion(XContentBuilder b) throws b.startObject("meta"); b.endObject(); - b.field("index_options", (Object)null); + b.field("index_options", (Object) null); } protected void minimalMappingWithExplicitDefaults(XContentBuilder b) throws IOException { @@ -230,7 +226,8 @@ public void testDefaults() throws Exception { assertTrue(freq1 < freq2); } - private void buildDocForSparseVectorFieldMapping(XContentBuilder b, CheckedConsumer supplier) throws IOException { + private void buildDocForSparseVectorFieldMapping(XContentBuilder b, CheckedConsumer supplier) + throws IOException { b.startObject("_doc"); { b.startArray("dynamic_date_formats"); @@ -740,12 +737,7 @@ public void testTypeQueryFinalizationWithIndexExplicit() throws Exception { MapperService mapperService = createMapperService(version, fieldMapping(this::minimalMapping)); // query should be pruned via explicit index options - performTypeQueryFinalizationTest( - mapperService, - null, - null, - true - ); + performTypeQueryFinalizationTest(mapperService, null, null, true); } public void testTypeQueryFinalizationWithIndexExplicitDoNotPrune() throws Exception { @@ -761,12 +753,7 @@ public void testTypeQueryFinalizationQueryOverridesPruning() throws Exception { MapperService mapperService = createMapperService(version, fieldMapping(this::mappingWithIndexOptionsPruneFalse)); // query should still be pruned due to query builder setting it - performTypeQueryFinalizationTest( - mapperService, - true, - new TokenPruningConfig(), - true - ); + performTypeQueryFinalizationTest(mapperService, true, new TokenPruningConfig(), true); } public void testTypeQueryFinalizationQueryOverridesPruningOff() throws Exception { @@ -774,12 +761,7 @@ public void testTypeQueryFinalizationQueryOverridesPruningOff() throws Exception MapperService mapperService = createMapperService(version, fieldMapping(this::mappingWithIndexOptionsPruneFalse)); // query should not pruned due to query builder setting it - performTypeQueryFinalizationTest( - mapperService, - false, - null, - false - ); + performTypeQueryFinalizationTest(mapperService, false, null, false); } private void performTypeQueryFinalizationTest( @@ -789,8 +771,9 @@ private void performTypeQueryFinalizationTest( boolean queryShouldBePruned ) throws IOException { withSearchExecutionContext(mapperService, (context) -> { - SparseVectorFieldMapper.SparseVectorFieldType ft = (SparseVectorFieldMapper.SparseVectorFieldType) mapperService - .fieldType("field"); + SparseVectorFieldMapper.SparseVectorFieldType ft = (SparseVectorFieldMapper.SparseVectorFieldType) mapperService.fieldType( + "field" + ); Query finalizedQuery = ft.finalizeSparseVectorQuery(context, "field", QUERY_VECTORS, queryPrune, queryTokenPruningConfig); if (queryShouldBePruned) { @@ -851,20 +834,13 @@ private void runTestTypeQueryFinalization( // we should _not_ prune if any of the following: // - the query explicitly overrides the options and `prune` is set to false // - the query does not override the pruning options and: - // - either we are using a previous index version - // - or the index_options explicitly sets `prune` to false - boolean resultShouldNotBePruned = ( - (queryOverridesPruning && queryOverrideExplicitFalse) || - (queryOverridesPruning == false && (usePreviousIndex || explicitIndexOptionsDoNotPrune)) - ); + // - either we are using a previous index version + // - or the index_options explicitly sets `prune` to false + boolean resultShouldNotBePruned = ((queryOverridesPruning && queryOverrideExplicitFalse) + || (queryOverridesPruning == false && (usePreviousIndex || explicitIndexOptionsDoNotPrune))); try { - performTypeQueryFinalizationTest( - mapperService, - shouldQueryPrune, - queryPruningConfig, - resultShouldNotBePruned == false - ); + performTypeQueryFinalizationTest(mapperService, shouldQueryPrune, queryPruningConfig, resultShouldNotBePruned == false); } catch (AssertionError e) { String message = "performTypeQueryFinalizationTest failed using parameters: " + "usePreviousIndex: " @@ -884,16 +860,12 @@ private void runTestTypeQueryFinalization( private IndexVersion getIndexVersionForTest(boolean usePreviousIndex) { return usePreviousIndex - ? IndexVersionUtils.randomVersionBetween( - random(), - UPGRADE_TO_LUCENE_10_0_0, - IndexVersionUtils.getPreviousVersion(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT) - ) - : IndexVersionUtils.randomVersionBetween( - random(), - SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT, - IndexVersion.current() - ); + ? IndexVersionUtils.randomVersionBetween( + random(), + UPGRADE_TO_LUCENE_10_0_0, + IndexVersionUtils.getPreviousVersion(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT) + ) + : IndexVersionUtils.randomVersionBetween(random(), SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT, IndexVersion.current()); } private SparseVectorFieldMapper.IndexOptions getIndexOptionsQueryFinalization( From 584030cb9c50c866c44fc8bc416134ceb6fb6bac Mon Sep 17 00:00:00 2001 From: "Mark J. Hoy" Date: Wed, 18 Jun 2025 09:25:58 -0400 Subject: [PATCH 28/37] cleanups to mapper tests for clarity --- .../vectors/SparseVectorFieldMapperTests.java | 118 ++++++++---------- 1 file changed, 55 insertions(+), 63 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java index 03e32887de044..461d21e0ef378 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java @@ -703,11 +703,11 @@ private void withSearchExecutionContext(MapperService mapperService, CheckedCons public void testTypeQueryFinalizationWithRandomOptions() throws Exception { for (int i = 0; i < 20; i++) { runTestTypeQueryFinalization( - randomBoolean(), // usePreviousIndex - randomBoolean(), // useIndexOptionsDefaults - randomBoolean(), // explicitIndexOptionsDoNotPrune - randomBoolean(), // queryOverridesPruning - randomBoolean() // queryOverrideExplicitFalse + randomBoolean(), // useIndexVersionBeforeIndexOptions + randomBoolean(), // useMapperDefaultIndexOptions + randomBoolean(), // setMapperIndexOptionsPruneToFalse + randomBoolean(), // queryOverridesPruningConfig + randomBoolean() // queryOverridesPruneToBeFalse ); } } @@ -801,61 +801,62 @@ private void assertQueryHasClauseCount(Query query, int clauseCount) { assertThat(clauses.size(), equalTo(clauseCount)); } + /** + * Runs a test of the query finalization based on various parameters + * that provides + * @param useIndexVersionBeforeIndexOptions set to true to use a previous index version before mapper index_options + * @param useMapperDefaultIndexOptions set to false to use an explicit, non-default mapper index_options + * @param setMapperIndexOptionsPruneToFalse set to true to use prune:false in the mapper index_options + * @param queryOverridesPruningConfig set to true to designate the query will provide a pruning_config + * @param queryOverridesPruneToBeFalse if true and queryOverridesPruningConfig is true, the query will provide prune:false + * @throws IOException + */ private void runTestTypeQueryFinalization( - boolean usePreviousIndex, - boolean useIndexOptionsDefaults, - boolean explicitIndexOptionsDoNotPrune, - boolean queryOverridesPruning, - boolean queryOverrideExplicitFalse + boolean useIndexVersionBeforeIndexOptions, + boolean useMapperDefaultIndexOptions, + boolean setMapperIndexOptionsPruneToFalse, + boolean queryOverridesPruningConfig, + boolean queryOverridesPruneToBeFalse ) throws IOException { - // get the index version of the test to use - // either a current version that supports index options, or - // a previous version that does now - IndexVersion version = getIndexVersionForTest(usePreviousIndex); - - // create our mapper service - // if we set explicitIndexOptionsDoNotPrune, the index_options (if present) - // will explicitly include "prune: false" - MapperService mapperService = getMapperServiceForRandomizedFinalizationTest( - version, - useIndexOptionsDefaults, - explicitIndexOptionsDoNotPrune + MapperService mapperService = getMapperServiceForTest( + useIndexVersionBeforeIndexOptions, + useMapperDefaultIndexOptions, + setMapperIndexOptionsPruneToFalse ); // check and see if the query should explicitly override the index_options - Boolean shouldQueryPrune = queryOverridesPruning ? (queryOverrideExplicitFalse == false) : null; + Boolean shouldQueryPrune = queryOverridesPruningConfig ? (queryOverridesPruneToBeFalse == false) : null; // get the pruning configuration for the query if it's overriding - TokenPruningConfig queryPruningConfig = queryOverridesPruning && queryOverrideExplicitFalse == false - ? new TokenPruningConfig() - : null; + TokenPruningConfig queryPruningConfig = Boolean.TRUE.equals(shouldQueryPrune) ? new TokenPruningConfig() : null; // our logic if the results should be pruned or not // we should _not_ prune if any of the following: // - the query explicitly overrides the options and `prune` is set to false // - the query does not override the pruning options and: - // - either we are using a previous index version - // - or the index_options explicitly sets `prune` to false - boolean resultShouldNotBePruned = ((queryOverridesPruning && queryOverrideExplicitFalse) - || (queryOverridesPruning == false && (usePreviousIndex || explicitIndexOptionsDoNotPrune))); + // - either we are using a previous index version + // - or the index_options explicitly sets `prune` to false + boolean resultShouldNotBePruned = ( + (queryOverridesPruningConfig && queryOverridesPruneToBeFalse) || + (queryOverridesPruningConfig == false && (useIndexVersionBeforeIndexOptions || setMapperIndexOptionsPruneToFalse)) + ); try { performTypeQueryFinalizationTest(mapperService, shouldQueryPrune, queryPruningConfig, resultShouldNotBePruned == false); } catch (AssertionError e) { String message = "performTypeQueryFinalizationTest failed using parameters: " - + "usePreviousIndex: " - + usePreviousIndex - + ", useIndexOptionsDefaults: " - + useIndexOptionsDefaults - + ", explicitIndexOptionsDoNotPrune: " - + explicitIndexOptionsDoNotPrune - + ", queryOverridesPruning: " - + queryOverridesPruning - + ", queryOverrideExplicitFalse: " - + queryOverrideExplicitFalse; + + "useIndexVersionBeforeIndexOptions: " + + useIndexVersionBeforeIndexOptions + + ", useMapperDefaultIndexOptions: " + + useMapperDefaultIndexOptions + + ", setMapperIndexOptionsPruneToFalse: " + + setMapperIndexOptionsPruneToFalse + + ", queryOverridesPruningConfig: " + + queryOverridesPruningConfig + + ", queryOverridesPruneToBeFalse: " + + queryOverridesPruneToBeFalse; throw new AssertionError(message, e); } - } private IndexVersion getIndexVersionForTest(boolean usePreviousIndex) { @@ -868,38 +869,29 @@ private IndexVersion getIndexVersionForTest(boolean usePreviousIndex) { : IndexVersionUtils.randomVersionBetween(random(), SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT, IndexVersion.current()); } - private SparseVectorFieldMapper.IndexOptions getIndexOptionsQueryFinalization( + private MapperService getMapperServiceForTest( boolean usePreviousIndex, boolean useIndexOptionsDefaults, - boolean explicitIndexOptionsDoNotPrune - ) { - if (usePreviousIndex) { - return null; - } - - if (useIndexOptionsDefaults && explicitIndexOptionsDoNotPrune == false) { - return null; - } - - return explicitIndexOptionsDoNotPrune - ? new SparseVectorFieldMapper.IndexOptions(false, null) - : new SparseVectorFieldMapper.IndexOptions(true, new TokenPruningConfig()); - } + boolean explicitIndexOptionsDoNotPrune) throws IOException + { + // get the index version of the test to use + // either a current version that supports index options, or a previous version that does not + IndexVersion indexVersion = getIndexVersionForTest(usePreviousIndex); - private MapperService getMapperServiceForRandomizedFinalizationTest( - IndexVersion indexVersion, - boolean useIndexOptionsDefaults, - boolean explicitIndexOptionsDoNotPrune - ) throws IOException { - if (useIndexOptionsDefaults && explicitIndexOptionsDoNotPrune == false) { + // if it's using the old index, we always use the minimal mapping without index_options + if (usePreviousIndex) { return createMapperService(indexVersion, fieldMapping(this::minimalMapping)); } + // if we set explicitIndexOptionsDoNotPrune, the index_options (if present) will explicitly include "prune: false" if (explicitIndexOptionsDoNotPrune) { return createMapperService(indexVersion, fieldMapping(this::mappingWithIndexOptionsPruneFalse)); } - return createMapperService(indexVersion, fieldMapping(this::minimalMapping)); + // either return the default (minimal) mapping or one with an explicit pruning_config + return useIndexOptionsDefaults + ? createMapperService(indexVersion, fieldMapping(this::minimalMapping)) + : createMapperService(indexVersion, fieldMapping(this::minimalMappingWithExplicitIndexOptions)); } private static List QUERY_VECTORS = List.of( From 05ff647cee03e86d9fd72d7e0f2886705201711f Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Wed, 18 Jun 2025 13:35:25 +0000 Subject: [PATCH 29/37] [CI] Auto commit changes from spotless --- .../vectors/SparseVectorFieldMapperTests.java | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java index 461d21e0ef378..a2a82be1da12c 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java @@ -834,12 +834,10 @@ private void runTestTypeQueryFinalization( // we should _not_ prune if any of the following: // - the query explicitly overrides the options and `prune` is set to false // - the query does not override the pruning options and: - // - either we are using a previous index version - // - or the index_options explicitly sets `prune` to false - boolean resultShouldNotBePruned = ( - (queryOverridesPruningConfig && queryOverridesPruneToBeFalse) || - (queryOverridesPruningConfig == false && (useIndexVersionBeforeIndexOptions || setMapperIndexOptionsPruneToFalse)) - ); + // - either we are using a previous index version + // - or the index_options explicitly sets `prune` to false + boolean resultShouldNotBePruned = ((queryOverridesPruningConfig && queryOverridesPruneToBeFalse) + || (queryOverridesPruningConfig == false && (useIndexVersionBeforeIndexOptions || setMapperIndexOptionsPruneToFalse))); try { performTypeQueryFinalizationTest(mapperService, shouldQueryPrune, queryPruningConfig, resultShouldNotBePruned == false); @@ -872,8 +870,8 @@ private IndexVersion getIndexVersionForTest(boolean usePreviousIndex) { private MapperService getMapperServiceForTest( boolean usePreviousIndex, boolean useIndexOptionsDefaults, - boolean explicitIndexOptionsDoNotPrune) throws IOException - { + boolean explicitIndexOptionsDoNotPrune + ) throws IOException { // get the index version of the test to use // either a current version that supports index options, or a previous version that does not IndexVersion indexVersion = getIndexVersionForTest(usePreviousIndex); From 1cab345f329c35137b716bb2f6779fc70f193d96 Mon Sep 17 00:00:00 2001 From: "Mark J. Hoy" Date: Wed, 18 Jun 2025 11:28:13 -0400 Subject: [PATCH 30/37] move feature into mappers; fix yaml tests --- .../index/mapper/MapperFeatures.java | 4 ++- .../xpack/core/XPackFeatures.java | 7 ----- .../test/multi_cluster/50_sparse_vector.yml | 26 +++++++++++++------ .../test/remote_cluster/50_sparse_vector.yml | 26 +++++++++++++------ .../test/ml/sparse_vector_search.yml | 19 ++++++++++---- 5 files changed, 53 insertions(+), 29 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java index cd56f281aa953..8817acf3b8e64 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java @@ -17,6 +17,7 @@ import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.RESCORE_VECTOR_QUANTIZED_VECTOR_MAPPING; import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.RESCORE_ZERO_VECTOR_QUANTIZED_VECTOR_MAPPING; import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.USE_DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ; +import static org.elasticsearch.index.mapper.vectors.SparseVectorFieldMapper.SPARSE_VECTOR_INDEX_OPTIONS_FEATURE; /** * Spec for mapper-related features. @@ -74,7 +75,8 @@ public Set getTestFeatures() { USE_DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ, IVF_FORMAT_CLUSTER_FEATURE, IVF_NESTED_SUPPORT, - SEARCH_LOAD_PER_SHARD + SEARCH_LOAD_PER_SHARD, + SPARSE_VECTOR_INDEX_OPTIONS_FEATURE ); } } diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackFeatures.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackFeatures.java index ad0f38d975b86..42824a553d2bd 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackFeatures.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackFeatures.java @@ -12,8 +12,6 @@ import java.util.Set; -import static org.elasticsearch.index.mapper.vectors.SparseVectorFieldMapper.SPARSE_VECTOR_INDEX_OPTIONS_FEATURE; - /** * Provides the XPack features that this version of the code supports */ @@ -22,9 +20,4 @@ public class XPackFeatures implements FeatureSpecification { public Set getFeatures() { return Set.of(); } - - @Override - public Set getTestFeatures() { - return Set.of(SPARSE_VECTOR_INDEX_OPTIONS_FEATURE); - } } diff --git a/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/multi_cluster/50_sparse_vector.yml b/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/multi_cluster/50_sparse_vector.yml index 92edc8c3bb5b7..509e593d0183f 100644 --- a/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/multi_cluster/50_sparse_vector.yml +++ b/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/multi_cluster/50_sparse_vector.yml @@ -252,15 +252,12 @@ teardown: - do: headers: - Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" #test_user credentials Content-Type: application/json indices.create: index: sparse_vector_pruning_test body: mappings: properties: - text: - type: text ml.tokens: type: sparse_vector @@ -268,15 +265,28 @@ teardown: - do: headers: - Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" #test_user credentials Content-Type: application/json - indices.get_mapping: + indices.get_field_mapping: index: sparse_vector_pruning_test + fields: ml.tokens + include_defaults: true # the index_options with pruning defaults will be serialized here explicitly - - match: { sparse_vector_pruning_test.mappings.properties.ml.properties.tokens.index_options.prune: true } - - match: { sparse_vector_pruning_test.mappings.properties.ml.properties.tokens.index_options.pruning_config.tokens_freq_ratio_threshold: 5.0 } - - match: { sparse_vector_pruning_test.mappings.properties.ml.properties.tokens.index_options.pruning_config.tokens_weight_threshold: 0.4 } + - match: { sparse_vector_pruning_test.mappings.ml\.tokens.mapping.tokens.index_options.prune: true } + - match: { sparse_vector_pruning_test.mappings.ml\.tokens.mapping.tokens.index_options.pruning_config.tokens_freq_ratio_threshold: 5.0 } + - match: { sparse_vector_pruning_test.mappings.ml\.tokens.mapping.tokens.index_options.pruning_config.tokens_weight_threshold: 0.4 } + + - do: + headers: + Content-Type: application/json + indices.get_field_mapping: + index: sparse_vector_pruning_test + fields: ml.tokens + + - not_exists: sparse_vector_pruning_test.mappings.ml.tokens.mapping.tokens.index_options.prune + - not_exists: sparse_vector_pruning_test.mappings.ml.tokens.mapping.tokens.index_options.pruning_config.tokens_freq_ratio_threshold + - not_exists: sparse_vector_pruning_test.mappings.ml.tokens.mapping.tokens.index_options.pruning_config.tokens_weight_threshold + - not_exists: sparse_vector_pruning_test.mappings.ml.tokens.mapping.tokens.index_options --- "Check sparse_vector token pruning index_options prune missing do not allow config": diff --git a/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/remote_cluster/50_sparse_vector.yml b/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/remote_cluster/50_sparse_vector.yml index 84c1112a66cae..6b1a143f8de47 100644 --- a/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/remote_cluster/50_sparse_vector.yml +++ b/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/remote_cluster/50_sparse_vector.yml @@ -250,15 +250,12 @@ teardown: - do: headers: - Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" #test_user credentials Content-Type: application/json indices.create: index: sparse_vector_pruning_test body: mappings: properties: - text: - type: text ml.tokens: type: sparse_vector @@ -266,15 +263,28 @@ teardown: - do: headers: - Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" #test_user credentials Content-Type: application/json - indices.get_mapping: + indices.get_field_mapping: index: sparse_vector_pruning_test + fields: ml.tokens + include_defaults: true # the index_options with pruning defaults will be serialized here explicitly - - match: { sparse_vector_pruning_test.mappings.properties.ml.properties.tokens.index_options.prune: true } - - match: { sparse_vector_pruning_test.mappings.properties.ml.properties.tokens.index_options.pruning_config.tokens_freq_ratio_threshold: 5.0 } - - match: { sparse_vector_pruning_test.mappings.properties.ml.properties.tokens.index_options.pruning_config.tokens_weight_threshold: 0.4 } + - match: { sparse_vector_pruning_test.mappings.ml\.tokens.mapping.tokens.index_options.prune: true } + - match: { sparse_vector_pruning_test.mappings.ml\.tokens.mapping.tokens.index_options.pruning_config.tokens_freq_ratio_threshold: 5.0 } + - match: { sparse_vector_pruning_test.mappings.ml\.tokens.mapping.tokens.index_options.pruning_config.tokens_weight_threshold: 0.4 } + + - do: + headers: + Content-Type: application/json + indices.get_field_mapping: + index: sparse_vector_pruning_test + fields: ml.tokens + + - not_exists: sparse_vector_pruning_test.mappings.ml.tokens.mapping.tokens.index_options.prune + - not_exists: sparse_vector_pruning_test.mappings.ml.tokens.mapping.tokens.index_options.pruning_config.tokens_freq_ratio_threshold + - not_exists: sparse_vector_pruning_test.mappings.ml.tokens.mapping.tokens.index_options.pruning_config.tokens_weight_threshold + - not_exists: sparse_vector_pruning_test.mappings.ml.tokens.mapping.tokens.index_options --- "Check sparse_vector token pruning index_options prune missing do not allow config": diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/sparse_vector_search.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/sparse_vector_search.yml index 846ba3289f902..9401b481d7748 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/sparse_vector_search.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/sparse_vector_search.yml @@ -585,8 +585,6 @@ teardown: body: mappings: properties: - text: - type: text ml.tokens: type: sparse_vector @@ -601,10 +599,21 @@ teardown: include_defaults: true # the index_options with pruning defaults will be serialized here explicitly - - match: { sparse_vector_pruning_test.mappings.ml.tokens.mapping.tokens.index_options.prune: true } - - match: { sparse_vector_pruning_test.mappings.ml.tokens.mapping.tokens.index_options.pruning_config.tokens_freq_ratio_threshold: 5.0 } - - match: { sparse_vector_pruning_test.mappings.ml.tokens.mapping.tokens.index_options.pruning_config.tokens_weight_threshold: 0.4 } + - match: { sparse_vector_pruning_test.mappings.ml\.tokens.mapping.tokens.index_options.prune: true } + - match: { sparse_vector_pruning_test.mappings.ml\.tokens.mapping.tokens.index_options.pruning_config.tokens_freq_ratio_threshold: 5.0 } + - match: { sparse_vector_pruning_test.mappings.ml\.tokens.mapping.tokens.index_options.pruning_config.tokens_weight_threshold: 0.4 } + + - do: + headers: + Content-Type: application/json + indices.get_field_mapping: + index: sparse_vector_pruning_test + fields: ml.tokens + - not_exists: sparse_vector_pruning_test.mappings.ml.tokens.mapping.tokens.index_options.prune + - not_exists: sparse_vector_pruning_test.mappings.ml.tokens.mapping.tokens.index_options.pruning_config.tokens_freq_ratio_threshold + - not_exists: sparse_vector_pruning_test.mappings.ml.tokens.mapping.tokens.index_options.pruning_config.tokens_weight_threshold + - not_exists: sparse_vector_pruning_test.mappings.ml.tokens.mapping.tokens.index_options --- "Check sparse_vector token pruning index_options prune missing do not allow config": From 71af3310f671e80611e4220559e3bce30dd800d0 Mon Sep 17 00:00:00 2001 From: "Mark J. Hoy" Date: Wed, 18 Jun 2025 11:58:42 -0400 Subject: [PATCH 31/37] cleanups; add comments; remove redundant test --- .../vectors/SparseVectorFieldMapper.java | 12 ++---- .../vectors/SparseVectorFieldMapperTests.java | 37 ++++++++++++------- .../vectors/SparseVectorFieldTypeTests.java | 8 +++- .../queries/SemanticQueryBuilderTests.java | 5 ++- 4 files changed, 36 insertions(+), 26 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java index 5488212f444d3..98c00dc3c2031 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java @@ -300,10 +300,6 @@ private static String indexedValueForSearch(Object value) { } return value.toString(); } - - public IndexVersion getIndexVersionCreated() { - return indexVersionCreated; - } } private SparseVectorFieldMapper(String simpleName, MappedFieldType mappedFieldType, BuilderParams builderParams) { @@ -325,8 +321,7 @@ public Map indexAnalyzers() { @Override public FieldMapper.Builder getMergeBuilder() { - IndexVersion indexVersion = this.fieldType() != null ? this.fieldType().getIndexVersionCreated() : IndexVersion.current(); - return new Builder(leafName(), indexVersion).init(this); + return new Builder(leafName(), this.fieldType().indexVersionCreated).init(this); } @Override @@ -406,9 +401,8 @@ protected String contentType() { private static boolean indexVersionSupportsDefaultPruningConfig(IndexVersion indexVersion) { // default pruning for 9.1.0+ or 8.19.0+ is true for this index - return (indexVersion != null - && (indexVersion.onOrAfter(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION) - || indexVersion.between(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION_8_X, IndexVersions.UPGRADE_TO_LUCENE_10_0_0))); + return (indexVersion.onOrAfter(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION) + || indexVersion.between(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION_8_X, IndexVersions.UPGRADE_TO_LUCENE_10_0_0)); } private static class SparseVectorValueFetcher implements ValueFetcher { diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java index a2a82be1da12c..e360eb48b49ea 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java @@ -38,6 +38,7 @@ import org.elasticsearch.search.lookup.Source; import org.elasticsearch.search.vectors.SparseVectorQueryWrapper; import org.elasticsearch.test.index.IndexVersionUtils; +import org.elasticsearch.xcontent.ToXContent; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentParseException; import org.elasticsearch.xcontent.json.JsonXContent; @@ -88,6 +89,9 @@ protected void minimalFieldMappingPreviousIndexVersion(XContentBuilder b) throws b.startObject("meta"); b.endObject(); + // note that internally, this will have a `index_options: null` field, + // but when serialized back to the client, this field will be pruned + // the YAML Rest tests checks for this b.field("index_options", (Object) null); } @@ -256,7 +260,7 @@ private void buildDocForSparseVectorFieldMapping(XContentBuilder b, CheckedConsu b.endObject(); }; - public void testDefaultsWithIncludeDefaults() throws Exception { + public void testDefaultsWithAndWithoutIncludeDefaults() throws Exception { XContentBuilder orig = JsonXContent.contentBuilder().startObject(); createMapperService(fieldMapping(this::minimalMapping)).documentMapper().mapping().toXContent(orig, INCLUDE_DEFAULTS); orig.endObject(); @@ -266,9 +270,16 @@ public void testDefaultsWithIncludeDefaults() throws Exception { withDefaults.endObject(); assertEquals(Strings.toString(withDefaults), Strings.toString(orig)); + + XContentBuilder origWithoutDefaults = JsonXContent.contentBuilder().startObject(); + createMapperService(fieldMapping(this::minimalMapping)) + .documentMapper().mapping().toXContent(origWithoutDefaults, ToXContent.EMPTY_PARAMS); + origWithoutDefaults.endObject(); + + assertEquals(Strings.toString(fieldMapping(this::minimalMapping)), Strings.toString(origWithoutDefaults)); } - public void testDefaultsWithIncludeDefaultsOlderIndexVersion() throws Exception { + public void testDefaultsWithAndWithoutIncludeDefaultsOlderIndexVersion() throws Exception { IndexVersion indexVersion = IndexVersionUtils.randomVersionBetween( random(), UPGRADE_TO_LUCENE_10_0_0, @@ -284,19 +295,17 @@ public void testDefaultsWithIncludeDefaultsOlderIndexVersion() throws Exception withDefaults.endObject(); assertEquals(Strings.toString(withDefaults), Strings.toString(orig)); - } - public void testMappingWithoutIndexOptionsUsesDefaults() throws Exception { - DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); - assertEquals(Strings.toString(fieldMapping(this::minimalMapping)), mapper.mappingSource().toString()); + XContentBuilder origWithoutDefaults = JsonXContent.contentBuilder().startObject(); + createMapperService(indexVersion, fieldMapping(this::minimalMapping)) + .documentMapper().mapping().toXContent(origWithoutDefaults, INCLUDE_DEFAULTS); + origWithoutDefaults.endObject(); - IndexVersion preIndexOptionsVersion = IndexVersionUtils.randomVersionBetween( - random(), - UPGRADE_TO_LUCENE_10_0_0, - IndexVersionUtils.getPreviousVersion(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION) - ); - DocumentMapper previousMapper = createDocumentMapper(preIndexOptionsVersion, fieldMapping(this::minimalMapping)); - assertEquals(Strings.toString(fieldMapping(this::minimalMapping)), previousMapper.mappingSource().toString()); + XContentBuilder withoutDefaults = JsonXContent.contentBuilder().startObject(); + buildDocForSparseVectorFieldMapping(withoutDefaults, this::minimalFieldMappingPreviousIndexVersion); + withoutDefaults.endObject(); + + assertEquals(Strings.toString(withoutDefaults), Strings.toString(origWithoutDefaults)); } public void testMappingWithExplicitIndexOptions() throws Exception { @@ -694,7 +703,7 @@ private void withSearchExecutionContext(MapperService mapperService, CheckedCons iw.close(); try (DirectoryReader reader = wrapInMockESDirectoryReader(DirectoryReader.open(directory))) { - var searchContext = createSearchExecutionContext(mapperService, new IndexSearcher(reader)); + var searchContext = createSearchExecutionContext(mapperService, newSearcher(reader)); consumer.accept(searchContext); } } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldTypeTests.java index ecf0936be28b2..7d7879d9dd41e 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldTypeTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldTypeTests.java @@ -10,23 +10,27 @@ package org.elasticsearch.index.mapper.vectors; import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.index.IndexVersions; import org.elasticsearch.index.fielddata.FieldDataContext; import org.elasticsearch.index.mapper.FieldTypeTestCase; import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.test.index.IndexVersionUtils; import java.util.Collections; public class SparseVectorFieldTypeTests extends FieldTypeTestCase { public void testDocValuesDisabled() { - IndexVersion indexVersion = IndexVersion.current(); + IndexVersion indexVersion = IndexVersionUtils.randomVersionBetween(random(), + IndexVersions.SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT, IndexVersion.current()); MappedFieldType fieldType = new SparseVectorFieldMapper.SparseVectorFieldType(indexVersion, "field", false, Collections.emptyMap()); assertFalse(fieldType.hasDocValues()); expectThrows(IllegalArgumentException.class, () -> fieldType.fielddataBuilder(FieldDataContext.noRuntimeFields("test"))); } public void testIsNotAggregatable() { - IndexVersion indexVersion = IndexVersion.current(); + IndexVersion indexVersion = IndexVersionUtils.randomVersionBetween(random(), + IndexVersions.SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT, IndexVersion.current()); MappedFieldType fieldType = new SparseVectorFieldMapper.SparseVectorFieldType(indexVersion, "field", false, Collections.emptyMap()); assertFalse(fieldType.isAggregatable()); } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilderTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilderTests.java index ac1c8b05b51a8..a7eda7112723e 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilderTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilderTests.java @@ -102,7 +102,6 @@ public class SemanticQueryBuilderTests extends AbstractQueryTestCase Date: Wed, 18 Jun 2025 16:11:46 +0000 Subject: [PATCH 32/37] [CI] Auto commit changes from spotless --- .../mapper/vectors/SparseVectorFieldMapper.java | 2 +- .../vectors/SparseVectorFieldMapperTests.java | 10 ++++++---- .../mapper/vectors/SparseVectorFieldTypeTests.java | 14 ++++++++++---- 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java index 98c00dc3c2031..a9733e83c8be3 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java @@ -402,7 +402,7 @@ protected String contentType() { private static boolean indexVersionSupportsDefaultPruningConfig(IndexVersion indexVersion) { // default pruning for 9.1.0+ or 8.19.0+ is true for this index return (indexVersion.onOrAfter(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION) - || indexVersion.between(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION_8_X, IndexVersions.UPGRADE_TO_LUCENE_10_0_0)); + || indexVersion.between(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION_8_X, IndexVersions.UPGRADE_TO_LUCENE_10_0_0)); } private static class SparseVectorValueFetcher implements ValueFetcher { diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java index e360eb48b49ea..375048dd1771a 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java @@ -272,8 +272,9 @@ public void testDefaultsWithAndWithoutIncludeDefaults() throws Exception { assertEquals(Strings.toString(withDefaults), Strings.toString(orig)); XContentBuilder origWithoutDefaults = JsonXContent.contentBuilder().startObject(); - createMapperService(fieldMapping(this::minimalMapping)) - .documentMapper().mapping().toXContent(origWithoutDefaults, ToXContent.EMPTY_PARAMS); + createMapperService(fieldMapping(this::minimalMapping)).documentMapper() + .mapping() + .toXContent(origWithoutDefaults, ToXContent.EMPTY_PARAMS); origWithoutDefaults.endObject(); assertEquals(Strings.toString(fieldMapping(this::minimalMapping)), Strings.toString(origWithoutDefaults)); @@ -297,8 +298,9 @@ public void testDefaultsWithAndWithoutIncludeDefaultsOlderIndexVersion() throws assertEquals(Strings.toString(withDefaults), Strings.toString(orig)); XContentBuilder origWithoutDefaults = JsonXContent.contentBuilder().startObject(); - createMapperService(indexVersion, fieldMapping(this::minimalMapping)) - .documentMapper().mapping().toXContent(origWithoutDefaults, INCLUDE_DEFAULTS); + createMapperService(indexVersion, fieldMapping(this::minimalMapping)).documentMapper() + .mapping() + .toXContent(origWithoutDefaults, INCLUDE_DEFAULTS); origWithoutDefaults.endObject(); XContentBuilder withoutDefaults = JsonXContent.contentBuilder().startObject(); diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldTypeTests.java index 7d7879d9dd41e..c8e96797d6838 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldTypeTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldTypeTests.java @@ -21,16 +21,22 @@ public class SparseVectorFieldTypeTests extends FieldTypeTestCase { public void testDocValuesDisabled() { - IndexVersion indexVersion = IndexVersionUtils.randomVersionBetween(random(), - IndexVersions.SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT, IndexVersion.current()); + IndexVersion indexVersion = IndexVersionUtils.randomVersionBetween( + random(), + IndexVersions.SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT, + IndexVersion.current() + ); MappedFieldType fieldType = new SparseVectorFieldMapper.SparseVectorFieldType(indexVersion, "field", false, Collections.emptyMap()); assertFalse(fieldType.hasDocValues()); expectThrows(IllegalArgumentException.class, () -> fieldType.fielddataBuilder(FieldDataContext.noRuntimeFields("test"))); } public void testIsNotAggregatable() { - IndexVersion indexVersion = IndexVersionUtils.randomVersionBetween(random(), - IndexVersions.SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT, IndexVersion.current()); + IndexVersion indexVersion = IndexVersionUtils.randomVersionBetween( + random(), + IndexVersions.SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT, + IndexVersion.current() + ); MappedFieldType fieldType = new SparseVectorFieldMapper.SparseVectorFieldType(indexVersion, "field", false, Collections.emptyMap()); assertFalse(fieldType.isAggregatable()); } From 2d5bfb7feec9df71b170f785aa6ed4c657cd5ee9 Mon Sep 17 00:00:00 2001 From: "Mark J. Hoy" Date: Wed, 18 Jun 2025 12:16:42 -0400 Subject: [PATCH 33/37] escape more periods in the YAML tests --- .../rest-api-spec/test/multi_cluster/50_sparse_vector.yml | 8 ++++---- .../test/remote_cluster/50_sparse_vector.yml | 8 ++++---- .../rest-api-spec/test/ml/sparse_vector_search.yml | 8 ++++---- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/multi_cluster/50_sparse_vector.yml b/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/multi_cluster/50_sparse_vector.yml index 509e593d0183f..46db203a52e7b 100644 --- a/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/multi_cluster/50_sparse_vector.yml +++ b/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/multi_cluster/50_sparse_vector.yml @@ -283,10 +283,10 @@ teardown: index: sparse_vector_pruning_test fields: ml.tokens - - not_exists: sparse_vector_pruning_test.mappings.ml.tokens.mapping.tokens.index_options.prune - - not_exists: sparse_vector_pruning_test.mappings.ml.tokens.mapping.tokens.index_options.pruning_config.tokens_freq_ratio_threshold - - not_exists: sparse_vector_pruning_test.mappings.ml.tokens.mapping.tokens.index_options.pruning_config.tokens_weight_threshold - - not_exists: sparse_vector_pruning_test.mappings.ml.tokens.mapping.tokens.index_options + - not_exists: sparse_vector_pruning_test.mappings.ml\.tokens.mapping.tokens.index_options.prune + - not_exists: sparse_vector_pruning_test.mappings.ml\.tokens.mapping.tokens.index_options.pruning_config.tokens_freq_ratio_threshold + - not_exists: sparse_vector_pruning_test.mappings.ml\.tokens.mapping.tokens.index_options.pruning_config.tokens_weight_threshold + - not_exists: sparse_vector_pruning_test.mappings.ml\.tokens.mapping.tokens.index_options --- "Check sparse_vector token pruning index_options prune missing do not allow config": diff --git a/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/remote_cluster/50_sparse_vector.yml b/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/remote_cluster/50_sparse_vector.yml index 6b1a143f8de47..2445f5eb38173 100644 --- a/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/remote_cluster/50_sparse_vector.yml +++ b/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/remote_cluster/50_sparse_vector.yml @@ -281,10 +281,10 @@ teardown: index: sparse_vector_pruning_test fields: ml.tokens - - not_exists: sparse_vector_pruning_test.mappings.ml.tokens.mapping.tokens.index_options.prune - - not_exists: sparse_vector_pruning_test.mappings.ml.tokens.mapping.tokens.index_options.pruning_config.tokens_freq_ratio_threshold - - not_exists: sparse_vector_pruning_test.mappings.ml.tokens.mapping.tokens.index_options.pruning_config.tokens_weight_threshold - - not_exists: sparse_vector_pruning_test.mappings.ml.tokens.mapping.tokens.index_options + - not_exists: sparse_vector_pruning_test.mappings.ml\.tokens.mapping.tokens.index_options.prune + - not_exists: sparse_vector_pruning_test.mappings.ml\.tokens.mapping.tokens.index_options.pruning_config.tokens_freq_ratio_threshold + - not_exists: sparse_vector_pruning_test.mappings.ml\.tokens.mapping.tokens.index_options.pruning_config.tokens_weight_threshold + - not_exists: sparse_vector_pruning_test.mappings.ml\.tokens.mapping.tokens.index_options --- "Check sparse_vector token pruning index_options prune missing do not allow config": diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/sparse_vector_search.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/sparse_vector_search.yml index 9401b481d7748..6f83b3faf9ee2 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/sparse_vector_search.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/sparse_vector_search.yml @@ -610,10 +610,10 @@ teardown: index: sparse_vector_pruning_test fields: ml.tokens - - not_exists: sparse_vector_pruning_test.mappings.ml.tokens.mapping.tokens.index_options.prune - - not_exists: sparse_vector_pruning_test.mappings.ml.tokens.mapping.tokens.index_options.pruning_config.tokens_freq_ratio_threshold - - not_exists: sparse_vector_pruning_test.mappings.ml.tokens.mapping.tokens.index_options.pruning_config.tokens_weight_threshold - - not_exists: sparse_vector_pruning_test.mappings.ml.tokens.mapping.tokens.index_options + - not_exists: sparse_vector_pruning_test.mappings.ml\.tokens.mapping.tokens.index_options.prune + - not_exists: sparse_vector_pruning_test.mappings.ml\.tokens.mapping.tokens.index_options.pruning_config.tokens_freq_ratio_threshold + - not_exists: sparse_vector_pruning_test.mappings.ml\.tokens.mapping.tokens.index_options.pruning_config.tokens_weight_threshold + - not_exists: sparse_vector_pruning_test.mappings.ml\.tokens.mapping.tokens.index_options --- "Check sparse_vector token pruning index_options prune missing do not allow config": From e9f7c408dbe9a2b2b23f3636aa9825898ab2b7c3 Mon Sep 17 00:00:00 2001 From: "Mark J. Hoy" Date: Wed, 18 Jun 2025 14:55:23 -0400 Subject: [PATCH 34/37] cleanup mapper and type tests --- .../vectors/SparseVectorFieldMapperTests.java | 72 ++++++++----------- .../vectors/SparseVectorFieldTypeTests.java | 4 +- 2 files changed, 30 insertions(+), 46 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java index 375048dd1771a..a80cc376987a9 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java @@ -21,6 +21,7 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.tests.index.RandomIndexWriter; import org.elasticsearch.common.Strings; +import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.compress.CompressedXContent; import org.elasticsearch.core.CheckedConsumer; import org.elasticsearch.core.Nullable; @@ -41,6 +42,7 @@ import org.elasticsearch.xcontent.ToXContent; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentParseException; +import org.elasticsearch.xcontent.XContentType; import org.elasticsearch.xcontent.json.JsonXContent; import org.hamcrest.Matchers; import org.junit.AssumptionViolatedException; @@ -57,6 +59,7 @@ import static org.elasticsearch.index.mapper.vectors.SparseVectorFieldMapper.NEW_SPARSE_VECTOR_INDEX_VERSION; import static org.elasticsearch.index.mapper.vectors.SparseVectorFieldMapper.PREVIOUS_SPARSE_VECTOR_INDEX_VERSION; import static org.elasticsearch.index.mapper.vectors.SparseVectorFieldMapper.SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertToXContentEquivalent; import static org.elasticsearch.xcontent.XContentFactory.jsonBuilder; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; @@ -230,54 +233,31 @@ public void testDefaults() throws Exception { assertTrue(freq1 < freq2); } - private void buildDocForSparseVectorFieldMapping(XContentBuilder b, CheckedConsumer supplier) - throws IOException { - b.startObject("_doc"); - { - b.startArray("dynamic_date_formats"); - { - b.value("strict_date_optional_time||epoch_millis"); - b.value("yyyy/MM/dd HH:mm:ss||yyyy/MM/dd||epoch_millis"); - } - b.endArray(); - - b.startArray("dynamic_templates"); - b.endArray(); - - b.field("date_detection", true); - b.field("numeric_detection", false); - - b.startObject("properties"); - { - b.startObject("field"); - - supplier.accept(b); - - b.endObject(); - } - b.endObject(); - } - b.endObject(); - }; - public void testDefaultsWithAndWithoutIncludeDefaults() throws Exception { XContentBuilder orig = JsonXContent.contentBuilder().startObject(); - createMapperService(fieldMapping(this::minimalMapping)).documentMapper().mapping().toXContent(orig, INCLUDE_DEFAULTS); + createMapperService(fieldMapping(this::minimalMapping)).mappingLookup().getMapper("field").toXContent(orig, INCLUDE_DEFAULTS); orig.endObject(); XContentBuilder withDefaults = JsonXContent.contentBuilder().startObject(); - buildDocForSparseVectorFieldMapping(withDefaults, this::minimalMappingWithExplicitDefaults); + withDefaults.startObject("field"); + minimalMappingWithExplicitDefaults(withDefaults); + withDefaults.endObject(); withDefaults.endObject(); - assertEquals(Strings.toString(withDefaults), Strings.toString(orig)); + assertToXContentEquivalent(BytesReference.bytes(withDefaults), BytesReference.bytes(orig), XContentType.JSON); XContentBuilder origWithoutDefaults = JsonXContent.contentBuilder().startObject(); - createMapperService(fieldMapping(this::minimalMapping)).documentMapper() - .mapping() + createMapperService(fieldMapping(this::minimalMapping)).mappingLookup().getMapper("field") .toXContent(origWithoutDefaults, ToXContent.EMPTY_PARAMS); origWithoutDefaults.endObject(); - assertEquals(Strings.toString(fieldMapping(this::minimalMapping)), Strings.toString(origWithoutDefaults)); + XContentBuilder withoutDefaults = JsonXContent.contentBuilder().startObject(); + withoutDefaults.startObject("field"); + minimalMapping(withoutDefaults); + withoutDefaults.endObject(); + withoutDefaults.endObject(); + + assertToXContentEquivalent(BytesReference.bytes(withoutDefaults), BytesReference.bytes(origWithoutDefaults), XContentType.JSON); } public void testDefaultsWithAndWithoutIncludeDefaultsOlderIndexVersion() throws Exception { @@ -288,26 +268,30 @@ public void testDefaultsWithAndWithoutIncludeDefaultsOlderIndexVersion() throws ); XContentBuilder orig = JsonXContent.contentBuilder().startObject(); - createMapperService(indexVersion, fieldMapping(this::minimalMapping)).documentMapper().mapping().toXContent(orig, INCLUDE_DEFAULTS); + createMapperService(indexVersion, fieldMapping(this::minimalMapping)).mappingLookup().getMapper("field") + .toXContent(orig, INCLUDE_DEFAULTS); orig.endObject(); XContentBuilder withDefaults = JsonXContent.contentBuilder().startObject(); - buildDocForSparseVectorFieldMapping(withDefaults, this::minimalFieldMappingPreviousIndexVersion); + withDefaults.startObject("field"); + minimalFieldMappingPreviousIndexVersion(withDefaults); + withDefaults.endObject(); withDefaults.endObject(); - assertEquals(Strings.toString(withDefaults), Strings.toString(orig)); + assertToXContentEquivalent(BytesReference.bytes(withDefaults), BytesReference.bytes(orig), XContentType.JSON); XContentBuilder origWithoutDefaults = JsonXContent.contentBuilder().startObject(); - createMapperService(indexVersion, fieldMapping(this::minimalMapping)).documentMapper() - .mapping() - .toXContent(origWithoutDefaults, INCLUDE_DEFAULTS); + createMapperService(indexVersion, fieldMapping(this::minimalMapping)).mappingLookup().getMapper("field") + .toXContent(origWithoutDefaults, ToXContent.EMPTY_PARAMS); origWithoutDefaults.endObject(); XContentBuilder withoutDefaults = JsonXContent.contentBuilder().startObject(); - buildDocForSparseVectorFieldMapping(withoutDefaults, this::minimalFieldMappingPreviousIndexVersion); + withoutDefaults.startObject("field"); + minimalMapping(withoutDefaults); + withoutDefaults.endObject(); withoutDefaults.endObject(); - assertEquals(Strings.toString(withoutDefaults), Strings.toString(origWithoutDefaults)); + assertToXContentEquivalent(BytesReference.bytes(withoutDefaults), BytesReference.bytes(origWithoutDefaults), XContentType.JSON); } public void testMappingWithExplicitIndexOptions() throws Exception { diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldTypeTests.java index c8e96797d6838..0503204886abb 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldTypeTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldTypeTests.java @@ -23,7 +23,7 @@ public class SparseVectorFieldTypeTests extends FieldTypeTestCase { public void testDocValuesDisabled() { IndexVersion indexVersion = IndexVersionUtils.randomVersionBetween( random(), - IndexVersions.SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT, + IndexVersions.NEW_SPARSE_VECTOR, IndexVersion.current() ); MappedFieldType fieldType = new SparseVectorFieldMapper.SparseVectorFieldType(indexVersion, "field", false, Collections.emptyMap()); @@ -34,7 +34,7 @@ public void testDocValuesDisabled() { public void testIsNotAggregatable() { IndexVersion indexVersion = IndexVersionUtils.randomVersionBetween( random(), - IndexVersions.SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT, + IndexVersions.NEW_SPARSE_VECTOR, IndexVersion.current() ); MappedFieldType fieldType = new SparseVectorFieldMapper.SparseVectorFieldType(indexVersion, "field", false, Collections.emptyMap()); From b30377cf5e4b5d0fe1f570918c7a802767277544 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Wed, 18 Jun 2025 19:03:53 +0000 Subject: [PATCH 35/37] [CI] Auto commit changes from spotless --- .../mapper/vectors/SparseVectorFieldMapperTests.java | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java index a80cc376987a9..7d45684f52136 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java @@ -247,7 +247,8 @@ public void testDefaultsWithAndWithoutIncludeDefaults() throws Exception { assertToXContentEquivalent(BytesReference.bytes(withDefaults), BytesReference.bytes(orig), XContentType.JSON); XContentBuilder origWithoutDefaults = JsonXContent.contentBuilder().startObject(); - createMapperService(fieldMapping(this::minimalMapping)).mappingLookup().getMapper("field") + createMapperService(fieldMapping(this::minimalMapping)).mappingLookup() + .getMapper("field") .toXContent(origWithoutDefaults, ToXContent.EMPTY_PARAMS); origWithoutDefaults.endObject(); @@ -268,7 +269,8 @@ public void testDefaultsWithAndWithoutIncludeDefaultsOlderIndexVersion() throws ); XContentBuilder orig = JsonXContent.contentBuilder().startObject(); - createMapperService(indexVersion, fieldMapping(this::minimalMapping)).mappingLookup().getMapper("field") + createMapperService(indexVersion, fieldMapping(this::minimalMapping)).mappingLookup() + .getMapper("field") .toXContent(orig, INCLUDE_DEFAULTS); orig.endObject(); @@ -281,7 +283,8 @@ public void testDefaultsWithAndWithoutIncludeDefaultsOlderIndexVersion() throws assertToXContentEquivalent(BytesReference.bytes(withDefaults), BytesReference.bytes(orig), XContentType.JSON); XContentBuilder origWithoutDefaults = JsonXContent.contentBuilder().startObject(); - createMapperService(indexVersion, fieldMapping(this::minimalMapping)).mappingLookup().getMapper("field") + createMapperService(indexVersion, fieldMapping(this::minimalMapping)).mappingLookup() + .getMapper("field") .toXContent(origWithoutDefaults, ToXContent.EMPTY_PARAMS); origWithoutDefaults.endObject(); From 5895490120683d2bdb1a1f1c0aaab064b548b93e Mon Sep 17 00:00:00 2001 From: "Mark J. Hoy" Date: Wed, 18 Jun 2025 15:15:14 -0400 Subject: [PATCH 36/37] rename mapping for previous index test --- .../index/mapper/vectors/SparseVectorFieldMapperTests.java | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java index 7d45684f52136..3ceab2cf204c7 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java @@ -85,16 +85,13 @@ protected void minimalMapping(XContentBuilder b) throws IOException { b.field("type", "sparse_vector"); } - protected void minimalFieldMappingPreviousIndexVersion(XContentBuilder b) throws IOException { + protected void minimalFieldMappingPreviousIndexDefaultsIncluded(XContentBuilder b) throws IOException { b.field("type", "sparse_vector"); b.field("store", false); b.startObject("meta"); b.endObject(); - // note that internally, this will have a `index_options: null` field, - // but when serialized back to the client, this field will be pruned - // the YAML Rest tests checks for this b.field("index_options", (Object) null); } @@ -276,7 +273,7 @@ public void testDefaultsWithAndWithoutIncludeDefaultsOlderIndexVersion() throws XContentBuilder withDefaults = JsonXContent.contentBuilder().startObject(); withDefaults.startObject("field"); - minimalFieldMappingPreviousIndexVersion(withDefaults); + minimalFieldMappingPreviousIndexDefaultsIncluded(withDefaults); withDefaults.endObject(); withDefaults.endObject(); From 862357339f2735a490e1ad3084bf366f77194f59 Mon Sep 17 00:00:00 2001 From: "Mark J. Hoy" Date: Mon, 23 Jun 2025 11:42:53 -0400 Subject: [PATCH 37/37] set explicit number of shards for yaml test --- .../rest-api-spec/test/multi_cluster/50_sparse_vector.yml | 2 ++ .../rest-api-spec/test/remote_cluster/50_sparse_vector.yml | 2 ++ .../resources/rest-api-spec/test/ml/sparse_vector_search.yml | 2 ++ 3 files changed, 6 insertions(+) diff --git a/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/multi_cluster/50_sparse_vector.yml b/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/multi_cluster/50_sparse_vector.yml index 46db203a52e7b..c3255467ff443 100644 --- a/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/multi_cluster/50_sparse_vector.yml +++ b/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/multi_cluster/50_sparse_vector.yml @@ -582,6 +582,8 @@ teardown: indices.create: index: test-sparse-vector-pruning-default body: + settings: + number_of_shards: 1 mappings: properties: content_embedding: diff --git a/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/remote_cluster/50_sparse_vector.yml b/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/remote_cluster/50_sparse_vector.yml index 2445f5eb38173..3014883e5b42a 100644 --- a/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/remote_cluster/50_sparse_vector.yml +++ b/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/remote_cluster/50_sparse_vector.yml @@ -580,6 +580,8 @@ teardown: indices.create: index: test-sparse-vector-pruning-default body: + settings: + number_of_shards: 1 mappings: properties: content_embedding: diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/sparse_vector_search.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/sparse_vector_search.yml index 6f83b3faf9ee2..2d440f1cf5987 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/sparse_vector_search.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/sparse_vector_search.yml @@ -905,6 +905,8 @@ teardown: indices.create: index: test-sparse-vector-pruning-default body: + settings: + number_of_shards: 1 mappings: properties: content_embedding: