Skip to content

Commit 64d6344

Browse files
Merge branch 'main' into 2025/07/28/write-load-decider
2 parents 6d85655 + 6ca5a18 commit 64d6344

File tree

67 files changed

+1663
-278
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

67 files changed

+1663
-278
lines changed

docs/changelog/131058.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 131058
2+
summary: Adds sparse vector index options settings to semantic_text field
3+
area: Search
4+
type: enhancement
5+
issues: []

docs/reference/elasticsearch/mapping-reference/semantic-text.md

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -156,9 +156,11 @@ to create the endpoint. If not specified, the {{infer}} endpoint defined by
156156

157157
`index_options` {applies_to}`stack: ga 9.1`
158158
: (Optional, object) Specifies the index options to override default values
159-
for the field. Currently, `dense_vector` index options are supported.
160-
For text embeddings, `index_options` may match any allowed
161-
[dense_vector index options](/reference/elasticsearch/mapping-reference/dense-vector.md#dense-vector-index-options).
159+
for the field. Currently, `dense_vector` and `sparse_vector` index options are supported.
160+
For text embeddings, `index_options` may match any allowed.
161+
162+
* [dense_vector index options](/reference/elasticsearch/mapping-reference/dense-vector.md#dense-vector-index-options).
163+
* [sparse_vector index options](/reference/elasticsearch/mapping-reference/sparse-vector.md#sparse-vectors-params). {applies_to}`stack: ga 9.2`
162164

163165
`chunking_settings` {applies_to}`stack: ga 9.1`
164166
: (Optional, object) Settings for chunking text into smaller passages.
@@ -410,7 +412,7 @@ stack: ga 9.0
410412
In case you want to customize data indexing, use the
411413
[`sparse_vector`](/reference/elasticsearch/mapping-reference/sparse-vector.md)
412414
or [`dense_vector`](/reference/elasticsearch/mapping-reference/dense-vector.md)
413-
field types and create an ingest pipeline with an
415+
field types and create an ingest pipeline with an
414416
[{{infer}} processor](/reference/enrich-processor/inference-processor.md) to
415417
generate the embeddings.
416418
[This tutorial](docs-content://solutions/search/semantic-search/semantic-search-inference.md)

docs/reference/text-analysis/analysis-lowercase-tokenfilter.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,15 +58,15 @@ PUT lowercase_example
5858
: (Optional, string) Language-specific lowercase token filter to use. Valid values include:
5959

6060
`greek`
61-
: Uses Lucene’s [GreekLowerCaseFilter](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/el/GreekLowerCaseFilter.md)
61+
: Uses Lucene’s [GreekLowerCaseFilter](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/el/GreekLowerCaseFilter.html)
6262

6363
`irish`
64-
: Uses Lucene’s [IrishLowerCaseFilter](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/ga/IrishLowerCaseFilter.md)
64+
: Uses Lucene’s [IrishLowerCaseFilter](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/ga/IrishLowerCaseFilter.html)
6565

6666
`turkish`
67-
: Uses Lucene’s [TurkishLowerCaseFilter](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/tr/TurkishLowerCaseFilter.md)
67+
: Uses Lucene’s [TurkishLowerCaseFilter](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/tr/TurkishLowerCaseFilter.html)
6868

69-
If not specified, defaults to Lucene’s [LowerCaseFilter](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/core/LowerCaseFilter.md).
69+
If not specified, defaults to Lucene’s [LowerCaseFilter](https://lucene.apache.org/core/10_0_0/analysis/common/org/apache/lucene/analysis/core/LowerCaseFilter.html).
7070

7171

7272

muted-tests.yml

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -413,15 +413,6 @@ tests:
413413
- class: org.elasticsearch.test.rest.yaml.RcsCcsCommonYamlTestSuiteIT
414414
method: test {p0=search/110_field_collapsing/field collapsing, inner_hits and maxConcurrentGroupRequests}
415415
issue: https://github.com/elastic/elasticsearch/issues/131348
416-
- class: org.elasticsearch.xpack.esql.vector.VectorSimilarityFunctionsIT
417-
method: testSimilarityBetweenConstantVectors {functionName=v_cosine similarityFunction=COSINE}
418-
issue: https://github.com/elastic/elasticsearch/issues/131361
419-
- class: org.elasticsearch.xpack.esql.vector.VectorSimilarityFunctionsIT
420-
method: testDifferentDimensions {functionName=v_cosine similarityFunction=COSINE}
421-
issue: https://github.com/elastic/elasticsearch/issues/131362
422-
- class: org.elasticsearch.xpack.esql.vector.VectorSimilarityFunctionsIT
423-
method: testSimilarityBetweenConstantVectorAndField {functionName=v_cosine similarityFunction=COSINE}
424-
issue: https://github.com/elastic/elasticsearch/issues/131363
425416
- class: org.elasticsearch.xpack.test.rest.XPackRestIT
426417
method: test {p0=ml/delete_expired_data/Test delete expired data with body parameters}
427418
issue: https://github.com/elastic/elasticsearch/issues/131364
@@ -485,9 +476,6 @@ tests:
485476
- class: org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapperTests
486477
method: testUpdates
487478
issue: https://github.com/elastic/elasticsearch/issues/131795
488-
- class: org.elasticsearch.xpack.esql.vector.VectorSimilarityFunctionsIT
489-
method: testDifferentDimensions {functionName=v_dot_product similarityFunction=DOT_PRODUCT}
490-
issue: https://github.com/elastic/elasticsearch/issues/131845
491479
- class: org.elasticsearch.xpack.restart.FullClusterRestartIT
492480
method: testWatcherWithApiKey {cluster=UPGRADED}
493481
issue: https://github.com/elastic/elasticsearch/issues/131964

server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java

Lines changed: 52 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@
4848
import org.elasticsearch.xcontent.DeprecationHandler;
4949
import org.elasticsearch.xcontent.NamedXContentRegistry;
5050
import org.elasticsearch.xcontent.ParseField;
51-
import org.elasticsearch.xcontent.ToXContent;
5251
import org.elasticsearch.xcontent.XContentBuilder;
5352
import org.elasticsearch.xcontent.XContentParser;
5453
import org.elasticsearch.xcontent.XContentParser.Token;
@@ -98,7 +97,7 @@ public static class Builder extends FieldMapper.Builder {
9897

9998
private final Parameter<Boolean> stored = Parameter.storeParam(m -> toType(m).fieldType().isStored(), false);
10099
private final Parameter<Map<String, String>> meta = Parameter.metaParam();
101-
private final Parameter<IndexOptions> indexOptions = new Parameter<>(
100+
private final Parameter<SparseVectorIndexOptions> indexOptions = new Parameter<>(
102101
SPARSE_VECTOR_INDEX_OPTIONS,
103102
true,
104103
() -> null,
@@ -128,9 +127,9 @@ protected Parameter<?>[] getParameters() {
128127

129128
@Override
130129
public SparseVectorFieldMapper build(MapperBuilderContext context) {
131-
IndexOptions builderIndexOptions = indexOptions.getValue();
130+
SparseVectorIndexOptions builderIndexOptions = indexOptions.getValue();
132131
if (builderIndexOptions == null) {
133-
builderIndexOptions = getDefaultIndexOptions(indexVersionCreated);
132+
builderIndexOptions = SparseVectorIndexOptions.getDefaultIndexOptions(indexVersionCreated);
134133
}
135134

136135
final boolean syntheticVectorFinal = context.isSourceSynthetic() == false && isSyntheticVector;
@@ -149,33 +148,34 @@ public SparseVectorFieldMapper build(MapperBuilderContext context) {
149148
);
150149
}
151150

152-
private IndexOptions getDefaultIndexOptions(IndexVersion indexVersion) {
153-
return (indexVersion.onOrAfter(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION)
154-
|| indexVersion.between(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION_8_X, IndexVersions.UPGRADE_TO_LUCENE_10_0_0))
155-
? IndexOptions.DEFAULT_PRUNING_INDEX_OPTIONS
156-
: null;
151+
private boolean indexOptionsSerializerCheck(boolean includeDefaults, boolean isConfigured, SparseVectorIndexOptions value) {
152+
return includeDefaults || (SparseVectorIndexOptions.isDefaultOptions(value, indexVersionCreated) == false);
157153
}
158154

159-
private boolean indexOptionsSerializerCheck(boolean includeDefaults, boolean isConfigured, IndexOptions value) {
160-
return includeDefaults || (IndexOptions.isDefaultOptions(value, indexVersionCreated) == false);
155+
public void setIndexOptions(SparseVectorIndexOptions sparseVectorIndexOptions) {
156+
indexOptions.setValue(sparseVectorIndexOptions);
161157
}
162158
}
163159

164-
public IndexOptions getIndexOptions() {
160+
public SparseVectorIndexOptions getIndexOptions() {
165161
return fieldType().getIndexOptions();
166162
}
167163

168-
private static final ConstructingObjectParser<IndexOptions, Void> INDEX_OPTIONS_PARSER = new ConstructingObjectParser<>(
164+
private static final ConstructingObjectParser<SparseVectorIndexOptions, Void> INDEX_OPTIONS_PARSER = new ConstructingObjectParser<>(
169165
SPARSE_VECTOR_INDEX_OPTIONS,
170-
args -> new IndexOptions((Boolean) args[0], (TokenPruningConfig) args[1])
166+
args -> new SparseVectorIndexOptions((Boolean) args[0], (TokenPruningConfig) args[1])
171167
);
172168

173169
static {
174-
INDEX_OPTIONS_PARSER.declareBoolean(optionalConstructorArg(), IndexOptions.PRUNE_FIELD_NAME);
175-
INDEX_OPTIONS_PARSER.declareObject(optionalConstructorArg(), TokenPruningConfig.PARSER, IndexOptions.PRUNING_CONFIG_FIELD_NAME);
170+
INDEX_OPTIONS_PARSER.declareBoolean(optionalConstructorArg(), SparseVectorIndexOptions.PRUNE_FIELD_NAME);
171+
INDEX_OPTIONS_PARSER.declareObject(
172+
optionalConstructorArg(),
173+
TokenPruningConfig.PARSER,
174+
SparseVectorIndexOptions.PRUNING_CONFIG_FIELD_NAME
175+
);
176176
}
177177

178-
private static SparseVectorFieldMapper.IndexOptions parseIndexOptions(MappingParserContext context, Object propNode) {
178+
private static SparseVectorIndexOptions parseIndexOptions(MappingParserContext context, Object propNode) {
179179
if (propNode == null) {
180180
return null;
181181
}
@@ -212,7 +212,7 @@ private static SparseVectorFieldMapper.IndexOptions parseIndexOptions(MappingPar
212212

213213
public static final class SparseVectorFieldType extends MappedFieldType {
214214
private final IndexVersion indexVersionCreated;
215-
private final IndexOptions indexOptions;
215+
private final SparseVectorIndexOptions indexOptions;
216216

217217
public SparseVectorFieldType(IndexVersion indexVersionCreated, String name, boolean isStored, Map<String, String> meta) {
218218
this(indexVersionCreated, name, isStored, meta, null);
@@ -223,14 +223,14 @@ public SparseVectorFieldType(
223223
String name,
224224
boolean isStored,
225225
Map<String, String> meta,
226-
@Nullable SparseVectorFieldMapper.IndexOptions indexOptions
226+
@Nullable SparseVectorIndexOptions indexOptions
227227
) {
228228
super(name, true, isStored, false, TextSearchInfo.SIMPLE_MATCH_ONLY, meta);
229229
this.indexVersionCreated = indexVersionCreated;
230230
this.indexOptions = indexOptions;
231231
}
232232

233-
public IndexOptions getIndexOptions() {
233+
public SparseVectorIndexOptions getIndexOptions() {
234234
return indexOptions;
235235
}
236236

@@ -560,15 +560,18 @@ public void reset() {
560560
}
561561
}
562562

563-
public static class IndexOptions implements ToXContent {
563+
public static class SparseVectorIndexOptions implements IndexOptions {
564564
public static final ParseField PRUNE_FIELD_NAME = new ParseField("prune");
565565
public static final ParseField PRUNING_CONFIG_FIELD_NAME = new ParseField("pruning_config");
566-
public static final IndexOptions DEFAULT_PRUNING_INDEX_OPTIONS = new IndexOptions(true, new TokenPruningConfig());
566+
public static final SparseVectorIndexOptions DEFAULT_PRUNING_INDEX_OPTIONS = new SparseVectorIndexOptions(
567+
true,
568+
new TokenPruningConfig()
569+
);
567570

568571
final Boolean prune;
569572
final TokenPruningConfig pruningConfig;
570573

571-
IndexOptions(@Nullable Boolean prune, @Nullable TokenPruningConfig pruningConfig) {
574+
public SparseVectorIndexOptions(@Nullable Boolean prune, @Nullable TokenPruningConfig pruningConfig) {
572575
if (pruningConfig != null && (prune == null || prune == false)) {
573576
throw new IllegalArgumentException(
574577
"["
@@ -585,14 +588,37 @@ public static class IndexOptions implements ToXContent {
585588
this.pruningConfig = pruningConfig;
586589
}
587590

588-
public static boolean isDefaultOptions(IndexOptions indexOptions, IndexVersion indexVersion) {
589-
IndexOptions defaultIndexOptions = indexVersionSupportsDefaultPruningConfig(indexVersion)
591+
public static boolean isDefaultOptions(SparseVectorIndexOptions indexOptions, IndexVersion indexVersion) {
592+
SparseVectorIndexOptions defaultIndexOptions = indexVersionSupportsDefaultPruningConfig(indexVersion)
590593
? DEFAULT_PRUNING_INDEX_OPTIONS
591594
: null;
592595

593596
return Objects.equals(indexOptions, defaultIndexOptions);
594597
}
595598

599+
public static SparseVectorIndexOptions getDefaultIndexOptions(IndexVersion indexVersion) {
600+
return indexVersionSupportsDefaultPruningConfig(indexVersion) ? DEFAULT_PRUNING_INDEX_OPTIONS : null;
601+
}
602+
603+
public static SparseVectorIndexOptions parseFromMap(Map<String, Object> map) {
604+
if (map == null) {
605+
return null;
606+
}
607+
608+
try {
609+
XContentParser parser = new MapXContentParser(
610+
NamedXContentRegistry.EMPTY,
611+
DeprecationHandler.IGNORE_DEPRECATIONS,
612+
map,
613+
XContentType.JSON
614+
);
615+
616+
return INDEX_OPTIONS_PARSER.parse(parser, null);
617+
} catch (IOException ioEx) {
618+
throw new UncheckedIOException(ioEx);
619+
}
620+
}
621+
596622
public Boolean getPrune() {
597623
return prune;
598624
}
@@ -626,7 +652,7 @@ public final boolean equals(Object other) {
626652
return false;
627653
}
628654

629-
IndexOptions otherAsIndexOptions = (IndexOptions) other;
655+
SparseVectorIndexOptions otherAsIndexOptions = (SparseVectorIndexOptions) other;
630656
return Objects.equals(prune, otherAsIndexOptions.prune) && Objects.equals(pruningConfig, otherAsIndexOptions.pruningConfig);
631657
}
632658

server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -906,4 +906,8 @@ private Map<String, Float> toFloats(Map<String, ?> value) {
906906
}
907907
return result;
908908
}
909+
910+
public static IndexVersion getIndexOptionsCompatibleIndexVersion() {
911+
return IndexVersionUtils.randomVersionBetween(random(), SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT, IndexVersion.current());
912+
}
909913
}

server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldTypeTests.java

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,4 +40,28 @@ public void testIsNotAggregatable() {
4040
MappedFieldType fieldType = new SparseVectorFieldMapper.SparseVectorFieldType(indexVersion, "field", false, Collections.emptyMap());
4141
assertFalse(fieldType.isAggregatable());
4242
}
43+
44+
public static SparseVectorFieldMapper.SparseVectorIndexOptions randomSparseVectorIndexOptions() {
45+
return randomSparseVectorIndexOptions(true);
46+
}
47+
48+
public static SparseVectorFieldMapper.SparseVectorIndexOptions randomSparseVectorIndexOptions(boolean includeNull) {
49+
if (includeNull && randomBoolean()) {
50+
return null;
51+
}
52+
53+
Boolean prune = randomBoolean() ? null : randomBoolean();
54+
if (prune == null) {
55+
new SparseVectorFieldMapper.SparseVectorIndexOptions(null, null);
56+
}
57+
58+
if (prune == Boolean.FALSE) {
59+
new SparseVectorFieldMapper.SparseVectorIndexOptions(false, null);
60+
}
61+
62+
return new SparseVectorFieldMapper.SparseVectorIndexOptions(
63+
true,
64+
new TokenPruningConfig(randomFloatBetween(1.0f, 100.0f, true), randomFloatBetween(0.0f, 1.0f, true), randomBoolean())
65+
);
66+
}
4367
}

x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/AbstractTopBytesRefAggregatorFunctionTests.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import org.elasticsearch.compute.data.Block;
1212
import org.elasticsearch.compute.data.BlockFactory;
1313
import org.elasticsearch.compute.data.BlockUtils;
14+
import org.elasticsearch.compute.data.Page;
1415
import org.elasticsearch.compute.operator.SequenceBytesRefBlockSourceOperator;
1516
import org.elasticsearch.compute.operator.SourceOperator;
1617

@@ -30,8 +31,8 @@ protected final SourceOperator simpleInput(BlockFactory blockFactory, int size)
3031
protected abstract BytesRef randomValue();
3132

3233
@Override
33-
public final void assertSimpleOutput(List<Block> input, Block result) {
34-
Object[] values = input.stream().flatMap(AggregatorFunctionTestCase::allBytesRefs).sorted().limit(LIMIT).toArray(Object[]::new);
34+
public final void assertSimpleOutput(List<Page> input, Block result) {
35+
Object[] values = input.stream().flatMap(p -> allBytesRefs(p.getBlock(0))).sorted().limit(LIMIT).toArray(Object[]::new);
3536
assertThat((List<?>) BlockUtils.toJavaObject(result, 0), contains(values));
3637
}
3738
}

x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/AggregatorFunctionTestCase.java

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,12 @@ protected final int aggregatorIntermediateBlockCount() {
5858

5959
protected abstract String expectedDescriptionOfAggregator();
6060

61-
protected abstract void assertSimpleOutput(List<Block> input, Block result);
61+
/**
62+
* Assert that the result is correct given the input.
63+
* @param input the input pages build by {@link #simpleInput}
64+
* @param result the result of running {@link #aggregatorFunction()}
65+
*/
66+
protected abstract void assertSimpleOutput(List<Page> input, Block result);
6267

6368
@Override
6469
protected Operator.OperatorFactory simpleWithMode(SimpleOptions options, AggregatorMode mode) {
@@ -99,7 +104,7 @@ protected final void assertSimpleOutput(List<Page> input, List<Page> results) {
99104
assertThat(results.get(0).getPositionCount(), equalTo(1));
100105

101106
Block result = results.get(0).getBlock(0);
102-
assertSimpleOutput(input.stream().map(p -> p.<Block>getBlock(0)).toList(), result);
107+
assertSimpleOutput(input, result);
103108
}
104109

105110
public final void testIgnoresNulls() {

x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/CountAggregatorFunctionTests.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import org.elasticsearch.compute.data.Block;
1111
import org.elasticsearch.compute.data.BlockFactory;
1212
import org.elasticsearch.compute.data.LongBlock;
13+
import org.elasticsearch.compute.data.Page;
1314
import org.elasticsearch.compute.operator.SourceOperator;
1415
import org.elasticsearch.compute.test.SequenceLongBlockSourceOperator;
1516

@@ -36,8 +37,8 @@ protected String expectedDescriptionOfAggregator() {
3637
}
3738

3839
@Override
39-
protected void assertSimpleOutput(List<Block> input, Block result) {
40-
long count = input.stream().flatMapToLong(b -> allLongs(b)).count();
40+
protected void assertSimpleOutput(List<Page> input, Block result) {
41+
long count = input.stream().flatMapToLong(p -> allLongs(p.getBlock(0))).count();
4142
assertThat(((LongBlock) result).getLong(0), equalTo(count));
4243
}
4344

0 commit comments

Comments
 (0)