Skip to content

Commit d6d2900

Browse files
committed
add sparse vector index options to semantic text
1 parent 7380179 commit d6d2900

File tree

4 files changed

+102
-28
lines changed

4 files changed

+102
-28
lines changed

server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java

Lines changed: 53 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@
4848
import org.elasticsearch.xcontent.DeprecationHandler;
4949
import org.elasticsearch.xcontent.NamedXContentRegistry;
5050
import org.elasticsearch.xcontent.ParseField;
51-
import org.elasticsearch.xcontent.ToXContent;
5251
import org.elasticsearch.xcontent.XContentBuilder;
5352
import org.elasticsearch.xcontent.XContentParser;
5453
import org.elasticsearch.xcontent.XContentParser.Token;
@@ -98,7 +97,7 @@ public static class Builder extends FieldMapper.Builder {
9897

9998
private final Parameter<Boolean> stored = Parameter.storeParam(m -> toType(m).fieldType().isStored(), false);
10099
private final Parameter<Map<String, String>> meta = Parameter.metaParam();
101-
private final Parameter<IndexOptions> indexOptions = new Parameter<>(
100+
private final Parameter<SparseVectorIndexOptions> indexOptions = new Parameter<>(
102101
SPARSE_VECTOR_INDEX_OPTIONS,
103102
true,
104103
() -> null,
@@ -128,7 +127,7 @@ protected Parameter<?>[] getParameters() {
128127

129128
@Override
130129
public SparseVectorFieldMapper build(MapperBuilderContext context) {
131-
IndexOptions builderIndexOptions = indexOptions.getValue();
130+
SparseVectorIndexOptions builderIndexOptions = indexOptions.getValue();
132131
if (builderIndexOptions == null) {
133132
builderIndexOptions = getDefaultIndexOptions(indexVersionCreated);
134133
}
@@ -149,33 +148,37 @@ public SparseVectorFieldMapper build(MapperBuilderContext context) {
149148
);
150149
}
151150

152-
private IndexOptions getDefaultIndexOptions(IndexVersion indexVersion) {
151+
private SparseVectorIndexOptions getDefaultIndexOptions(IndexVersion indexVersion) {
153152
return (indexVersion.onOrAfter(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION)
154153
|| indexVersion.between(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION_8_X, IndexVersions.UPGRADE_TO_LUCENE_10_0_0))
155-
? IndexOptions.DEFAULT_PRUNING_INDEX_OPTIONS
154+
? SparseVectorIndexOptions.DEFAULT_PRUNING_INDEX_OPTIONS
156155
: null;
157156
}
158157

159-
private boolean indexOptionsSerializerCheck(boolean includeDefaults, boolean isConfigured, IndexOptions value) {
160-
return includeDefaults || (IndexOptions.isDefaultOptions(value, indexVersionCreated) == false);
158+
private boolean indexOptionsSerializerCheck(boolean includeDefaults, boolean isConfigured, SparseVectorIndexOptions value) {
159+
return includeDefaults || (SparseVectorIndexOptions.isDefaultOptions(value, indexVersionCreated) == false);
161160
}
162161
}
163162

164-
public IndexOptions getIndexOptions() {
163+
public SparseVectorIndexOptions getIndexOptions() {
165164
return fieldType().getIndexOptions();
166165
}
167166

168-
private static final ConstructingObjectParser<IndexOptions, Void> INDEX_OPTIONS_PARSER = new ConstructingObjectParser<>(
167+
private static final ConstructingObjectParser<SparseVectorIndexOptions, Void> INDEX_OPTIONS_PARSER = new ConstructingObjectParser<>(
169168
SPARSE_VECTOR_INDEX_OPTIONS,
170-
args -> new IndexOptions((Boolean) args[0], (TokenPruningConfig) args[1])
169+
args -> new SparseVectorIndexOptions((Boolean) args[0], (TokenPruningConfig) args[1])
171170
);
172171

173172
static {
174-
INDEX_OPTIONS_PARSER.declareBoolean(optionalConstructorArg(), IndexOptions.PRUNE_FIELD_NAME);
175-
INDEX_OPTIONS_PARSER.declareObject(optionalConstructorArg(), TokenPruningConfig.PARSER, IndexOptions.PRUNING_CONFIG_FIELD_NAME);
173+
INDEX_OPTIONS_PARSER.declareBoolean(optionalConstructorArg(), SparseVectorIndexOptions.PRUNE_FIELD_NAME);
174+
INDEX_OPTIONS_PARSER.declareObject(
175+
optionalConstructorArg(),
176+
TokenPruningConfig.PARSER,
177+
SparseVectorIndexOptions.PRUNING_CONFIG_FIELD_NAME
178+
);
176179
}
177180

178-
private static SparseVectorFieldMapper.IndexOptions parseIndexOptions(MappingParserContext context, Object propNode) {
181+
private static SparseVectorIndexOptions parseIndexOptions(MappingParserContext context, Object propNode) {
179182
if (propNode == null) {
180183
return null;
181184
}
@@ -212,7 +215,7 @@ private static SparseVectorFieldMapper.IndexOptions parseIndexOptions(MappingPar
212215

213216
public static final class SparseVectorFieldType extends MappedFieldType {
214217
private final IndexVersion indexVersionCreated;
215-
private final IndexOptions indexOptions;
218+
private final SparseVectorIndexOptions indexOptions;
216219

217220
public SparseVectorFieldType(IndexVersion indexVersionCreated, String name, boolean isStored, Map<String, String> meta) {
218221
this(indexVersionCreated, name, isStored, meta, null);
@@ -223,14 +226,14 @@ public SparseVectorFieldType(
223226
String name,
224227
boolean isStored,
225228
Map<String, String> meta,
226-
@Nullable SparseVectorFieldMapper.IndexOptions indexOptions
229+
@Nullable SparseVectorIndexOptions indexOptions
227230
) {
228231
super(name, true, isStored, false, TextSearchInfo.SIMPLE_MATCH_ONLY, meta);
229232
this.indexVersionCreated = indexVersionCreated;
230233
this.indexOptions = indexOptions;
231234
}
232235

233-
public IndexOptions getIndexOptions() {
236+
public SparseVectorIndexOptions getIndexOptions() {
234237
return indexOptions;
235238
}
236239

@@ -560,15 +563,18 @@ public void reset() {
560563
}
561564
}
562565

563-
public static class IndexOptions implements ToXContent {
566+
public static class SparseVectorIndexOptions implements IndexOptions {
564567
public static final ParseField PRUNE_FIELD_NAME = new ParseField("prune");
565568
public static final ParseField PRUNING_CONFIG_FIELD_NAME = new ParseField("pruning_config");
566-
public static final IndexOptions DEFAULT_PRUNING_INDEX_OPTIONS = new IndexOptions(true, new TokenPruningConfig());
569+
public static final SparseVectorIndexOptions DEFAULT_PRUNING_INDEX_OPTIONS = new SparseVectorIndexOptions(
570+
true,
571+
new TokenPruningConfig()
572+
);
567573

568574
final Boolean prune;
569575
final TokenPruningConfig pruningConfig;
570576

571-
IndexOptions(@Nullable Boolean prune, @Nullable TokenPruningConfig pruningConfig) {
577+
SparseVectorIndexOptions(@Nullable Boolean prune, @Nullable TokenPruningConfig pruningConfig) {
572578
if (pruningConfig != null && (prune == null || prune == false)) {
573579
throw new IllegalArgumentException(
574580
"["
@@ -585,14 +591,39 @@ public static class IndexOptions implements ToXContent {
585591
this.pruningConfig = pruningConfig;
586592
}
587593

588-
public static boolean isDefaultOptions(IndexOptions indexOptions, IndexVersion indexVersion) {
589-
IndexOptions defaultIndexOptions = indexVersionSupportsDefaultPruningConfig(indexVersion)
594+
public static boolean isDefaultOptions(SparseVectorIndexOptions indexOptions, IndexVersion indexVersion) {
595+
SparseVectorIndexOptions defaultIndexOptions = indexVersionSupportsDefaultPruningConfig(indexVersion)
590596
? DEFAULT_PRUNING_INDEX_OPTIONS
591597
: null;
592598

593599
return Objects.equals(indexOptions, defaultIndexOptions);
594600
}
595601

602+
public static SparseVectorIndexOptions getDefaultIndexOptions(IndexVersion indexVersion) {
603+
return indexVersionSupportsDefaultPruningConfig(indexVersion) ? DEFAULT_PRUNING_INDEX_OPTIONS : null;
604+
}
605+
606+
public static SparseVectorIndexOptions parseFromMap(Map<String, Object> map) {
607+
Boolean prune = null;
608+
TokenPruningConfig pruningConfig = null;
609+
610+
for (Map.Entry<String, Object> entry : map.entrySet()) {
611+
if (entry.getKey().equals(PRUNE_FIELD_NAME.getPreferredName())) {
612+
prune = XContentMapValues.nodeBooleanValue(entry.getValue());
613+
} else if (entry.getKey().equals(PRUNING_CONFIG_FIELD_NAME.getPreferredName())) {
614+
Map<String, Object> pruningConfigMap = XContentMapValues.nodeMapValue(
615+
entry.getValue(),
616+
PRUNING_CONFIG_FIELD_NAME.getPreferredName()
617+
);
618+
pruningConfig = TokenPruningConfig.parseFromMap(pruningConfigMap);
619+
} else {
620+
throw new IllegalArgumentException("Unsupported index option field for sparse_vector: " + entry.getKey());
621+
}
622+
}
623+
624+
return new SparseVectorIndexOptions(prune, pruningConfig);
625+
}
626+
596627
public Boolean getPrune() {
597628
return prune;
598629
}
@@ -626,7 +657,7 @@ public final boolean equals(Object other) {
626657
return false;
627658
}
628659

629-
IndexOptions otherAsIndexOptions = (IndexOptions) other;
660+
SparseVectorIndexOptions otherAsIndexOptions = (SparseVectorIndexOptions) other;
630661
return Objects.equals(prune, otherAsIndexOptions.prune) && Objects.equals(pruningConfig, otherAsIndexOptions.pruningConfig);
631662
}
632663

server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldTypeTests.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,4 +40,16 @@ public void testIsNotAggregatable() {
4040
MappedFieldType fieldType = new SparseVectorFieldMapper.SparseVectorFieldType(indexVersion, "field", false, Collections.emptyMap());
4141
assertFalse(fieldType.isAggregatable());
4242
}
43+
44+
public static SparseVectorFieldMapper.SparseVectorIndexOptions randomSparseVectorIndexOptions() {
45+
if (randomBoolean() == false) {
46+
return new SparseVectorFieldMapper.SparseVectorIndexOptions(false, null);
47+
}
48+
49+
return new SparseVectorFieldMapper.SparseVectorIndexOptions(true, new TokenPruningConfig(
50+
randomFloatBetween(1.0f, 100.0f, true),
51+
randomFloatBetween(0.0f, 1.0f, true),
52+
randomBoolean()
53+
));
54+
}
4355
}

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,9 @@ public class SemanticTextFieldMapper extends FieldMapper implements InferenceFie
143143
public static final NodeFeature SEMANTIC_TEXT_INDEX_OPTIONS_WITH_DEFAULTS = new NodeFeature(
144144
"semantic_text.index_options_with_defaults"
145145
);
146+
public static final NodeFeature SEMANTIC_TEXT_SPARSE_VECTOR_INDEX_OPTIONS_SUPPORT = new NodeFeature(
147+
"semantic_text.sparse_vector_index_options_support"
148+
);
146149

147150
public static final String CONTENT_TYPE = "semantic_text";
148151
public static final String DEFAULT_ELSER_2_INFERENCE_ID = DEFAULT_ELSER_ID;
@@ -459,7 +462,6 @@ private void validateIndexOptions(SemanticTextIndexOptions indexOptions, String
459462
}
460463

461464
if (indexOptions.type() == SemanticTextIndexOptions.SupportedIndexOptions.DENSE_VECTOR) {
462-
463465
if (modelSettings.taskType() != TEXT_EMBEDDING) {
464466
throw new IllegalArgumentException(
465467
"Invalid task type for index options, required [" + TEXT_EMBEDDING + "] but was [" + modelSettings.taskType() + "]"
@@ -470,8 +472,10 @@ private void validateIndexOptions(SemanticTextIndexOptions indexOptions, String
470472
DenseVectorFieldMapper.DenseVectorIndexOptions denseVectorIndexOptions =
471473
(DenseVectorFieldMapper.DenseVectorIndexOptions) indexOptions.indexOptions();
472474
denseVectorIndexOptions.validate(modelSettings.elementType(), dims, true);
475+
} else if (indexOptions.type() == SemanticTextIndexOptions.SupportedIndexOptions.SPARSE_VECTOR) {
476+
// the options will be validated within the ctor for the SparseVectorIndexOptions
477+
indexOptions.indexOptions();
473478
}
474-
475479
}
476480

477481
/**
@@ -1259,23 +1263,28 @@ public static DenseVectorFieldMapper.DenseVectorIndexOptions defaultBbqHnswDense
12591263
}
12601264

12611265
static SemanticTextIndexOptions defaultIndexOptions(IndexVersion indexVersionCreated, MinimalServiceSettings modelSettings) {
1262-
12631266
if (modelSettings == null) {
12641267
return null;
12651268
}
12661269

1267-
SemanticTextIndexOptions defaultIndexOptions = null;
12681270
if (modelSettings.taskType() == TaskType.TEXT_EMBEDDING) {
12691271
DenseVectorFieldMapper.DenseVectorIndexOptions denseVectorIndexOptions = defaultDenseVectorIndexOptions(
12701272
indexVersionCreated,
12711273
modelSettings
12721274
);
1273-
defaultIndexOptions = denseVectorIndexOptions == null
1275+
return denseVectorIndexOptions == null
12741276
? null
12751277
: new SemanticTextIndexOptions(SemanticTextIndexOptions.SupportedIndexOptions.DENSE_VECTOR, denseVectorIndexOptions);
12761278
}
12771279

1278-
return defaultIndexOptions;
1280+
if (modelSettings.taskType() == SPARSE_EMBEDDING) {
1281+
return new SemanticTextIndexOptions(
1282+
SemanticTextIndexOptions.SupportedIndexOptions.SPARSE_VECTOR,
1283+
SparseVectorFieldMapper.SparseVectorIndexOptions.getDefaultIndexOptions(indexVersionCreated)
1284+
);
1285+
}
1286+
1287+
return null;
12791288
}
12801289

12811290
private static boolean canMergeModelSettings(MinimalServiceSettings previous, MinimalServiceSettings current, Conflicts conflicts) {

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextIndexOptions.java

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import org.elasticsearch.index.IndexVersion;
1414
import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper;
1515
import org.elasticsearch.index.mapper.vectors.IndexOptions;
16+
import org.elasticsearch.index.mapper.vectors.SparseVectorFieldMapper;
1617
import org.elasticsearch.xcontent.ToXContent;
1718
import org.elasticsearch.xcontent.XContentBuilder;
1819

@@ -76,6 +77,12 @@ public enum SupportedIndexOptions {
7677
public IndexOptions parseIndexOptions(String fieldName, Map<String, Object> map, IndexVersion indexVersion) {
7778
return parseDenseVectorIndexOptionsFromMap(fieldName, map, indexVersion);
7879
}
80+
},
81+
SPARSE_VECTOR("sparse_vector") {
82+
@Override
83+
public IndexOptions parseIndexOptions(String fieldName, Map<String, Object> map, IndexVersion indexVersion) {
84+
return parseSparseVectorIndexOptionsFromMap(map);
85+
}
7986
};
8087

8188
public final String value;
@@ -127,4 +134,19 @@ private static DenseVectorFieldMapper.DenseVectorIndexOptions parseDenseVectorIn
127134
throw new ElasticsearchException(exc);
128135
}
129136
}
137+
138+
private static SparseVectorFieldMapper.SparseVectorIndexOptions parseSparseVectorIndexOptionsFromMap(
139+
Map<String, Object> map
140+
) {
141+
try {
142+
Object type = map.remove(TYPE_FIELD);
143+
if (type == null) {
144+
throw new IllegalArgumentException("Required " + TYPE_FIELD);
145+
}
146+
147+
return SparseVectorFieldMapper.SparseVectorIndexOptions.parseFromMap(map);
148+
} catch (Exception exc) {
149+
throw new ElasticsearchException(exc);
150+
}
151+
}
130152
}

0 commit comments

Comments
 (0)