Skip to content
Merged
Show file tree
Hide file tree
Changes from 57 commits
Commits
Show all changes
59 commits
Select commit Hold shift + click to select a range
d6d2900
add sparse vector index options to semantic text
markjhoy Jul 10, 2025
f20bd23
[CI] Auto commit changes from spotless
Jul 10, 2025
c3e7bbf
Merge branch 'main' into markjhoy/add-sparse_vector_index_options-sup…
markjhoy Jul 11, 2025
5ed517d
current tests - not 100% working yet
markjhoy Jul 14, 2025
78cd8f2
sparse_vector index options/createEmbeddingsField
markjhoy Jul 14, 2025
a95aaf6
set default index options if we don't have any
markjhoy Jul 14, 2025
e0e819c
Merge branch 'main' into markjhoy/add-sparse_vector_index_options-sup…
markjhoy Jul 14, 2025
6cdd56c
[CI] Auto commit changes from spotless
Jul 14, 2025
556b28d
remove redundant code; set defaults
markjhoy Jul 14, 2025
6ab12dd
fix tests
markjhoy Jul 15, 2025
78138da
add validation test
markjhoy Jul 15, 2025
f254f05
[CI] Auto commit changes from spotless
Jul 15, 2025
0f4579f
Merge branch 'main' into markjhoy/add-sparse_vector_index_options-sup…
markjhoy Jul 17, 2025
e6f3d76
Merge branch 'main' into markjhoy/add-sparse_vector_index_options-sup…
markjhoy Jul 20, 2025
0c43ccc
add additional tests
markjhoy Jul 20, 2025
c209159
[CI] Auto commit changes from spotless
Jul 20, 2025
a16b687
fix tests
markjhoy Jul 20, 2025
6d9cecc
[CI] Auto commit changes from spotless
Jul 20, 2025
3260326
... and fix tests...
markjhoy Jul 21, 2025
b434f8b
[CI] Auto commit changes from spotless
Jul 21, 2025
b50831a
fill in test specific sparse vector index options
markjhoy Jul 21, 2025
5d4803f
remove unused node feature
markjhoy Jul 21, 2025
9deddc0
[CI] Auto commit changes from spotless
Jul 21, 2025
99c3410
Update docs/changelog/131058.yaml
markjhoy Jul 21, 2025
a687eb8
update changelog
markjhoy Jul 21, 2025
6a8bc77
some cleanups; still needs a few more tests
markjhoy Jul 25, 2025
1ec72ae
[CI] Auto commit changes from spotless
Jul 25, 2025
e3bbccc
Merge branch 'main' into markjhoy/add-sparse_vector_index_options-sup…
markjhoy Jul 25, 2025
c0a9732
fix additional tests
markjhoy Jul 28, 2025
420914e
[CI] Auto commit changes from spotless
Jul 28, 2025
4a70860
and fix more tests
markjhoy Jul 28, 2025
570b2e4
... annnnd... fix more tests
markjhoy Jul 28, 2025
eb9bbee
[CI] Auto commit changes from spotless
Jul 28, 2025
cd26f2d
Merge branch 'main' into markjhoy/add-sparse_vector_index_options-sup…
markjhoy Jul 28, 2025
a1f177b
clean tests; add YAML Rest tests
markjhoy Jul 29, 2025
03e1b42
[CI] Auto commit changes from spotless
Jul 29, 2025
a89f014
Merge branch 'main' into markjhoy/add-sparse_vector_index_options-sup…
markjhoy Jul 29, 2025
10eb3bb
fix failing tests
markjhoy Jul 30, 2025
479bffa
[CI] Auto commit changes from spotless
Jul 30, 2025
46157d8
fix tests due to multiple random index versioning
markjhoy Jul 30, 2025
28ba8e1
[CI] Auto commit changes from spotless
Jul 30, 2025
6ea41e2
fix tests; fix yaml tests;
markjhoy Jul 31, 2025
59d5833
[CI] Auto commit changes from spotless
Jul 31, 2025
8032b48
Merge branch 'main' into markjhoy/add-sparse_vector_index_options-sup…
markjhoy Jul 31, 2025
0504b2e
fix more tests due to random index versioning
markjhoy Jul 31, 2025
29fba01
[CI] Auto commit changes from spotless
Jul 31, 2025
a3ef0e3
... and more test cleeaning
markjhoy Jul 31, 2025
ec2bcaf
[CI] Auto commit changes from spotless
Jul 31, 2025
4d0b220
Merge branch 'main' into markjhoy/add-sparse_vector_index_options-sup…
markjhoy Jul 31, 2025
c329891
Merge branch 'main' into markjhoy/add-sparse_vector_index_options-sup…
markjhoy Jul 31, 2025
6cdca2e
Merge branch 'main' into markjhoy/add-sparse_vector_index_options-sup…
markjhoy Aug 1, 2025
a024298
add link to sparse_vector index_options for docs
markjhoy Aug 1, 2025
8887d20
fix docs
markjhoy Aug 1, 2025
d822bab
fix tests; remove old code
markjhoy Aug 1, 2025
885a469
correct tests; simplify mocking/spy ModelRegistry
markjhoy Aug 1, 2025
03ad8a3
add test for dense vector w/ sparse index options
markjhoy Aug 1, 2025
dff8220
[CI] Auto commit changes from spotless
Aug 1, 2025
ca4e533
Merge branch 'main' into markjhoy/add-sparse_vector_index_options-sup…
markjhoy Aug 1, 2025
3e11409
collapse multiple "@before" methods
markjhoy Aug 1, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/131058.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 131058
summary: Adds sparse vector index options settings to semantic_text field
area: Search
type: enhancement
issues: []
10 changes: 6 additions & 4 deletions docs/reference/elasticsearch/mapping-reference/semantic-text.md
Original file line number Diff line number Diff line change
Expand Up @@ -156,9 +156,11 @@ to create the endpoint. If not specified, the {{infer}} endpoint defined by

`index_options` {applies_to}`stack: ga 9.1`
: (Optional, object) Specifies the index options to override default values
for the field. Currently, `dense_vector` index options are supported.
For text embeddings, `index_options` may match any allowed
[dense_vector index options](/reference/elasticsearch/mapping-reference/dense-vector.md#dense-vector-index-options).
for the field. Currently, `dense_vector` and `sparse_vector` index options are supported.
For text embeddings, `index_options` may match any allowed.

* [dense_vector index options](/reference/elasticsearch/mapping-reference/dense-vector.md#dense-vector-index-options).
* [sparse_vector index options](/reference/elasticsearch/mapping-reference/sparse-vector.md#sparse-vectors-params). {applies_to}`stack: ga 9.2`

`chunking_settings` {applies_to}`stack: ga 9.1`
: (Optional, object) Settings for chunking text into smaller passages.
Expand Down Expand Up @@ -410,7 +412,7 @@ stack: ga 9.0
In case you want to customize data indexing, use the
[`sparse_vector`](/reference/elasticsearch/mapping-reference/sparse-vector.md)
or [`dense_vector`](/reference/elasticsearch/mapping-reference/dense-vector.md)
field types and create an ingest pipeline with an
field types and create an ingest pipeline with an
[{{infer}} processor](/reference/enrich-processor/inference-processor.md) to
generate the embeddings.
[This tutorial](docs-content://solutions/search/semantic-search/semantic-search-inference.md)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@
import org.elasticsearch.xcontent.DeprecationHandler;
import org.elasticsearch.xcontent.NamedXContentRegistry;
import org.elasticsearch.xcontent.ParseField;
import org.elasticsearch.xcontent.ToXContent;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.XContentParser;
import org.elasticsearch.xcontent.XContentParser.Token;
Expand Down Expand Up @@ -98,7 +97,7 @@ public static class Builder extends FieldMapper.Builder {

private final Parameter<Boolean> stored = Parameter.storeParam(m -> toType(m).fieldType().isStored(), false);
private final Parameter<Map<String, String>> meta = Parameter.metaParam();
private final Parameter<IndexOptions> indexOptions = new Parameter<>(
private final Parameter<SparseVectorIndexOptions> indexOptions = new Parameter<>(
SPARSE_VECTOR_INDEX_OPTIONS,
true,
() -> null,
Expand Down Expand Up @@ -128,9 +127,9 @@ protected Parameter<?>[] getParameters() {

@Override
public SparseVectorFieldMapper build(MapperBuilderContext context) {
IndexOptions builderIndexOptions = indexOptions.getValue();
SparseVectorIndexOptions builderIndexOptions = indexOptions.getValue();
if (builderIndexOptions == null) {
builderIndexOptions = getDefaultIndexOptions(indexVersionCreated);
builderIndexOptions = SparseVectorIndexOptions.getDefaultIndexOptions(indexVersionCreated);
}

final boolean syntheticVectorFinal = context.isSourceSynthetic() == false && isSyntheticVector;
Expand All @@ -149,33 +148,34 @@ public SparseVectorFieldMapper build(MapperBuilderContext context) {
);
}

private IndexOptions getDefaultIndexOptions(IndexVersion indexVersion) {
return (indexVersion.onOrAfter(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION)
|| indexVersion.between(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION_8_X, IndexVersions.UPGRADE_TO_LUCENE_10_0_0))
? IndexOptions.DEFAULT_PRUNING_INDEX_OPTIONS
: null;
private boolean indexOptionsSerializerCheck(boolean includeDefaults, boolean isConfigured, SparseVectorIndexOptions value) {
return includeDefaults || (SparseVectorIndexOptions.isDefaultOptions(value, indexVersionCreated) == false);
}

private boolean indexOptionsSerializerCheck(boolean includeDefaults, boolean isConfigured, IndexOptions value) {
return includeDefaults || (IndexOptions.isDefaultOptions(value, indexVersionCreated) == false);
public void setIndexOptions(SparseVectorIndexOptions sparseVectorIndexOptions) {
indexOptions.setValue(sparseVectorIndexOptions);
}
}

public IndexOptions getIndexOptions() {
public SparseVectorIndexOptions getIndexOptions() {
return fieldType().getIndexOptions();
}

private static final ConstructingObjectParser<IndexOptions, Void> INDEX_OPTIONS_PARSER = new ConstructingObjectParser<>(
private static final ConstructingObjectParser<SparseVectorIndexOptions, Void> INDEX_OPTIONS_PARSER = new ConstructingObjectParser<>(
SPARSE_VECTOR_INDEX_OPTIONS,
args -> new IndexOptions((Boolean) args[0], (TokenPruningConfig) args[1])
args -> new SparseVectorIndexOptions((Boolean) args[0], (TokenPruningConfig) args[1])
);

static {
INDEX_OPTIONS_PARSER.declareBoolean(optionalConstructorArg(), IndexOptions.PRUNE_FIELD_NAME);
INDEX_OPTIONS_PARSER.declareObject(optionalConstructorArg(), TokenPruningConfig.PARSER, IndexOptions.PRUNING_CONFIG_FIELD_NAME);
INDEX_OPTIONS_PARSER.declareBoolean(optionalConstructorArg(), SparseVectorIndexOptions.PRUNE_FIELD_NAME);
INDEX_OPTIONS_PARSER.declareObject(
optionalConstructorArg(),
TokenPruningConfig.PARSER,
SparseVectorIndexOptions.PRUNING_CONFIG_FIELD_NAME
);
}

private static SparseVectorFieldMapper.IndexOptions parseIndexOptions(MappingParserContext context, Object propNode) {
private static SparseVectorIndexOptions parseIndexOptions(MappingParserContext context, Object propNode) {
if (propNode == null) {
return null;
}
Expand Down Expand Up @@ -212,7 +212,7 @@ private static SparseVectorFieldMapper.IndexOptions parseIndexOptions(MappingPar

public static final class SparseVectorFieldType extends MappedFieldType {
private final IndexVersion indexVersionCreated;
private final IndexOptions indexOptions;
private final SparseVectorIndexOptions indexOptions;

public SparseVectorFieldType(IndexVersion indexVersionCreated, String name, boolean isStored, Map<String, String> meta) {
this(indexVersionCreated, name, isStored, meta, null);
Expand All @@ -223,14 +223,14 @@ public SparseVectorFieldType(
String name,
boolean isStored,
Map<String, String> meta,
@Nullable SparseVectorFieldMapper.IndexOptions indexOptions
@Nullable SparseVectorIndexOptions indexOptions
) {
super(name, true, isStored, false, TextSearchInfo.SIMPLE_MATCH_ONLY, meta);
this.indexVersionCreated = indexVersionCreated;
this.indexOptions = indexOptions;
}

public IndexOptions getIndexOptions() {
public SparseVectorIndexOptions getIndexOptions() {
return indexOptions;
}

Expand Down Expand Up @@ -560,15 +560,18 @@ public void reset() {
}
}

public static class IndexOptions implements ToXContent {
public static class SparseVectorIndexOptions implements IndexOptions {
public static final ParseField PRUNE_FIELD_NAME = new ParseField("prune");
public static final ParseField PRUNING_CONFIG_FIELD_NAME = new ParseField("pruning_config");
public static final IndexOptions DEFAULT_PRUNING_INDEX_OPTIONS = new IndexOptions(true, new TokenPruningConfig());
public static final SparseVectorIndexOptions DEFAULT_PRUNING_INDEX_OPTIONS = new SparseVectorIndexOptions(
true,
new TokenPruningConfig()
);

final Boolean prune;
final TokenPruningConfig pruningConfig;

IndexOptions(@Nullable Boolean prune, @Nullable TokenPruningConfig pruningConfig) {
public SparseVectorIndexOptions(@Nullable Boolean prune, @Nullable TokenPruningConfig pruningConfig) {
if (pruningConfig != null && (prune == null || prune == false)) {
throw new IllegalArgumentException(
"["
Expand All @@ -585,14 +588,37 @@ public static class IndexOptions implements ToXContent {
this.pruningConfig = pruningConfig;
}

public static boolean isDefaultOptions(IndexOptions indexOptions, IndexVersion indexVersion) {
IndexOptions defaultIndexOptions = indexVersionSupportsDefaultPruningConfig(indexVersion)
public static boolean isDefaultOptions(SparseVectorIndexOptions indexOptions, IndexVersion indexVersion) {
SparseVectorIndexOptions defaultIndexOptions = indexVersionSupportsDefaultPruningConfig(indexVersion)
? DEFAULT_PRUNING_INDEX_OPTIONS
: null;

return Objects.equals(indexOptions, defaultIndexOptions);
}

public static SparseVectorIndexOptions getDefaultIndexOptions(IndexVersion indexVersion) {
return indexVersionSupportsDefaultPruningConfig(indexVersion) ? DEFAULT_PRUNING_INDEX_OPTIONS : null;
}

public static SparseVectorIndexOptions parseFromMap(Map<String, Object> map) {
if (map == null) {
return null;
}

try {
XContentParser parser = new MapXContentParser(
NamedXContentRegistry.EMPTY,
DeprecationHandler.IGNORE_DEPRECATIONS,
map,
XContentType.JSON
);

return INDEX_OPTIONS_PARSER.parse(parser, null);
} catch (IOException ioEx) {
throw new UncheckedIOException(ioEx);
}
}

public Boolean getPrune() {
return prune;
}
Expand Down Expand Up @@ -626,7 +652,7 @@ public final boolean equals(Object other) {
return false;
}

IndexOptions otherAsIndexOptions = (IndexOptions) other;
SparseVectorIndexOptions otherAsIndexOptions = (SparseVectorIndexOptions) other;
return Objects.equals(prune, otherAsIndexOptions.prune) && Objects.equals(pruningConfig, otherAsIndexOptions.pruningConfig);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -906,4 +906,8 @@ private Map<String, Float> toFloats(Map<String, ?> value) {
}
return result;
}

public static IndexVersion getIndexOptionsCompatibleIndexVersion() {
return IndexVersionUtils.randomVersionBetween(random(), SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT, IndexVersion.current());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,28 @@ public void testIsNotAggregatable() {
MappedFieldType fieldType = new SparseVectorFieldMapper.SparseVectorFieldType(indexVersion, "field", false, Collections.emptyMap());
assertFalse(fieldType.isAggregatable());
}

public static SparseVectorFieldMapper.SparseVectorIndexOptions randomSparseVectorIndexOptions() {
return randomSparseVectorIndexOptions(true);
}

public static SparseVectorFieldMapper.SparseVectorIndexOptions randomSparseVectorIndexOptions(boolean includeNull) {
if (includeNull && randomBoolean()) {
return null;
}

Boolean prune = randomBoolean() ? null : randomBoolean();
if (prune == null) {
new SparseVectorFieldMapper.SparseVectorIndexOptions(null, null);
}

if (prune == Boolean.FALSE) {
new SparseVectorFieldMapper.SparseVectorIndexOptions(false, null);
}

return new SparseVectorFieldMapper.SparseVectorIndexOptions(
true,
new TokenPruningConfig(randomFloatBetween(1.0f, 100.0f, true), randomFloatBetween(0.0f, 1.0f, true), randomBoolean())
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper.SEMANTIC_TEXT_EXCLUDE_SUB_FIELDS_FROM_FIELD_CAPS;
import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper.SEMANTIC_TEXT_INDEX_OPTIONS;
import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper.SEMANTIC_TEXT_INDEX_OPTIONS_WITH_DEFAULTS;
import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper.SEMANTIC_TEXT_SPARSE_VECTOR_INDEX_OPTIONS;
import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper.SEMANTIC_TEXT_SUPPORT_CHUNKING_CONFIG;
import static org.elasticsearch.xpack.inference.queries.SemanticKnnVectorQueryRewriteInterceptor.SEMANTIC_KNN_FILTER_FIX;
import static org.elasticsearch.xpack.inference.queries.SemanticKnnVectorQueryRewriteInterceptor.SEMANTIC_KNN_VECTOR_QUERY_REWRITE_INTERCEPTION_SUPPORTED;
Expand Down Expand Up @@ -78,7 +79,8 @@ public Set<NodeFeature> getTestFeatures() {
COHERE_V2_API,
SEMANTIC_TEXT_INDEX_OPTIONS_WITH_DEFAULTS,
SEMANTIC_QUERY_REWRITE_INTERCEPTORS_PROPAGATE_BOOST_AND_QUERY_NAME_FIX,
SEMANTIC_TEXT_HIGHLIGHTING_FLAT
SEMANTIC_TEXT_HIGHLIGHTING_FLAT,
SEMANTIC_TEXT_SPARSE_VECTOR_INDEX_OPTIONS
)
);
if (RERANK_SNIPPETS.isEnabled()) {
Expand Down
Loading
Loading