Skip to content

Commit 5361d29

Browse files
committed
Add custom serializer
1 parent 876c456 commit 5361d29

File tree

2 files changed

+94
-38
lines changed

2 files changed

+94
-38
lines changed

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java

Lines changed: 81 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@
6969
import org.elasticsearch.inference.InferenceResults;
7070
import org.elasticsearch.inference.MinimalServiceSettings;
7171
import org.elasticsearch.inference.SimilarityMeasure;
72+
import org.elasticsearch.inference.TaskType;
7273
import org.elasticsearch.search.fetch.StoredFieldsSpec;
7374
import org.elasticsearch.search.lookup.Source;
7475
import org.elasticsearch.search.vectors.KnnVectorQueryBuilder;
@@ -166,19 +167,9 @@ public static BiConsumer<String, MappingParserContext> validateParserContext(Str
166167
public static class Builder extends FieldMapper.Builder {
167168
private final ModelRegistry modelRegistry;
168169
private final boolean useLegacyFormat;
170+
private final IndexVersion indexVersionCreated;
169171

170-
private final Parameter<String> inferenceId = Parameter.stringParam(
171-
INFERENCE_ID_FIELD,
172-
false,
173-
mapper -> ((SemanticTextFieldType) mapper.fieldType()).inferenceId,
174-
DEFAULT_ELSER_2_INFERENCE_ID
175-
).addValidator(v -> {
176-
if (Strings.isEmpty(v)) {
177-
throw new IllegalArgumentException(
178-
"[" + INFERENCE_ID_FIELD + "] on mapper [" + leafName() + "] of type [" + CONTENT_TYPE + "] must not be empty"
179-
);
180-
}
181-
}).alwaysSerialize();
172+
private final Parameter<String> inferenceId;
182173

183174
private final Parameter<String> searchInferenceId = Parameter.stringParam(
184175
SEARCH_INFERENCE_ID_FIELD,
@@ -193,25 +184,9 @@ public static class Builder extends FieldMapper.Builder {
193184
}
194185
});
195186

196-
private final Parameter<MinimalServiceSettings> modelSettings = new Parameter<>(
197-
MODEL_SETTINGS_FIELD,
198-
true,
199-
() -> null,
200-
(n, c, o) -> SemanticTextField.parseModelSettingsFromMap(o),
201-
mapper -> ((SemanticTextFieldType) mapper.fieldType()).modelSettings,
202-
XContentBuilder::field,
203-
Objects::toString
204-
).acceptsNull().setMergeValidator(SemanticTextFieldMapper::canMergeModelSettings);
187+
private final Parameter<MinimalServiceSettings> modelSettings;
205188

206-
private final Parameter<SemanticTextIndexOptions> indexOptions = new Parameter<>(
207-
INDEX_OPTIONS_FIELD,
208-
true,
209-
() -> null,
210-
(n, c, o) -> parseIndexOptionsFromMap(n, o, c.indexVersionCreated()),
211-
mapper -> ((SemanticTextFieldType) mapper.fieldType()).indexOptions,
212-
XContentBuilder::field,
213-
Objects::toString
214-
).acceptsNull();
189+
private final Parameter<SemanticTextIndexOptions> indexOptions;
215190

216191
@SuppressWarnings("unchecked")
217192
private final Parameter<ChunkingSettings> chunkingSettings = new Parameter<>(
@@ -248,6 +223,50 @@ public Builder(
248223
super(name);
249224
this.modelRegistry = modelRegistry;
250225
this.useLegacyFormat = InferenceMetadataFieldsMapper.isEnabled(indexSettings.getSettings()) == false;
226+
this.indexVersionCreated = indexSettings.getIndexVersionCreated();
227+
228+
this.inferenceId = Parameter.stringParam(
229+
INFERENCE_ID_FIELD,
230+
false,
231+
mapper -> ((SemanticTextFieldType) mapper.fieldType()).inferenceId,
232+
DEFAULT_ELSER_2_INFERENCE_ID
233+
).addValidator(v -> {
234+
if (Strings.isEmpty(v)) {
235+
throw new IllegalArgumentException(
236+
"[" + INFERENCE_ID_FIELD + "] on mapper [" + leafName() + "] of type [" + CONTENT_TYPE + "] must not be empty"
237+
);
238+
}
239+
}).alwaysSerialize();
240+
241+
this.modelSettings = new Parameter<>(
242+
MODEL_SETTINGS_FIELD,
243+
true,
244+
() -> null,
245+
(n, c, o) -> SemanticTextField.parseModelSettingsFromMap(o),
246+
mapper -> ((SemanticTextFieldType) mapper.fieldType()).modelSettings,
247+
XContentBuilder::field,
248+
Objects::toString
249+
).acceptsNull().setMergeValidator(SemanticTextFieldMapper::canMergeModelSettings);
250+
251+
this.indexOptions = new Parameter<>(
252+
INDEX_OPTIONS_FIELD,
253+
true,
254+
() -> null,
255+
(n, c, o) -> parseIndexOptionsFromMap(n, o, c.indexVersionCreated()),
256+
mapper -> ((SemanticTextFieldType) mapper.fieldType()).indexOptions,
257+
(b, n, v) -> {
258+
if (v == null) {
259+
MinimalServiceSettings resolvedModelSettings = modelSettings.get() != null
260+
? modelSettings.get()
261+
: modelRegistry.getMinimalServiceSettings(inferenceId.get());
262+
b.field(INDEX_OPTIONS_FIELD, defaultIndexOptions(indexVersionCreated, resolvedModelSettings));
263+
} else {
264+
b.field(INDEX_OPTIONS_FIELD, v);
265+
}
266+
},
267+
Objects::toString
268+
).acceptsNull().setSerializerCheck(this::indexOptionsSerializerCheck);
269+
251270
this.inferenceFieldBuilder = c -> {
252271
// Resolve the model setting from the registry if it has not been set yet.
253272
var resolvedModelSettings = modelSettings.get() != null ? modelSettings.get() : getResolvedModelSettings(c, false);
@@ -263,6 +282,17 @@ public Builder(
263282
};
264283
}
265284

285+
private boolean indexOptionsSerializerCheck(boolean includeDefaults, boolean isConfigured, SemanticTextIndexOptions value) {
286+
if (includeDefaults && value == null) {
287+
MinimalServiceSettings resolvedModelSettings = modelSettings.get() != null
288+
? modelSettings.get()
289+
: modelRegistry.getMinimalServiceSettings(inferenceId.get());
290+
return defaultIndexOptions(indexVersionCreated, resolvedModelSettings) != null;
291+
} else {
292+
return isConfigured;
293+
}
294+
}
295+
266296
public Builder setInferenceId(String id) {
267297
this.inferenceId.setValue(id);
268298
return this;
@@ -365,8 +395,9 @@ public SemanticTextFieldMapper build(MapperBuilderContext context) {
365395
validateServiceSettings(modelSettings.get(), resolvedModelSettings);
366396
}
367397

368-
if (context.getMergeReason() != MapperService.MergeReason.MAPPING_RECOVERY && indexOptions.get() != null) {
369-
validateIndexOptions(indexOptions.get(), inferenceId.getValue(), resolvedModelSettings);
398+
SemanticTextIndexOptions builderIndexOptions = indexOptions.get();
399+
if (context.getMergeReason() != MapperService.MergeReason.MAPPING_RECOVERY && builderIndexOptions != null) {
400+
validateIndexOptions(builderIndexOptions, inferenceId.getValue(), resolvedModelSettings);
370401
}
371402

372403
final String fullName = context.buildFullName(leafName());
@@ -1208,7 +1239,6 @@ static DenseVectorFieldMapper.DenseVectorIndexOptions defaultDenseVectorIndexOpt
12081239
// As embedding models for text perform better with BBQ, we aggressively default semantic_text fields to use optimized index
12091240
// options
12101241
if (indexVersionDefaultsToBbqHnsw(indexVersionCreated)) {
1211-
12121242
DenseVectorFieldMapper.DenseVectorIndexOptions defaultBbqHnswIndexOptions = defaultBbqHnswDenseVectorIndexOptions();
12131243
return defaultBbqHnswIndexOptions.validate(modelSettings.elementType(), modelSettings.dimensions(), false)
12141244
? defaultBbqHnswIndexOptions
@@ -1230,11 +1260,24 @@ static DenseVectorFieldMapper.DenseVectorIndexOptions defaultBbqHnswDenseVectorI
12301260
return new DenseVectorFieldMapper.BBQHnswIndexOptions(m, efConstruction, rescoreVector);
12311261
}
12321262

1233-
static SemanticTextIndexOptions defaultBbqHnswSemanticTextIndexOptions() {
1234-
return new SemanticTextIndexOptions(
1235-
SemanticTextIndexOptions.SupportedIndexOptions.DENSE_VECTOR,
1236-
defaultBbqHnswDenseVectorIndexOptions()
1237-
);
1263+
static SemanticTextIndexOptions defaultIndexOptions(IndexVersion indexVersionCreated, MinimalServiceSettings modelSettings) {
1264+
1265+
if (modelSettings == null) {
1266+
return null;
1267+
}
1268+
1269+
SemanticTextIndexOptions defaultIndexOptions = null;
1270+
if (modelSettings.taskType() == TaskType.TEXT_EMBEDDING) {
1271+
DenseVectorFieldMapper.DenseVectorIndexOptions denseVectorIndexOptions = defaultDenseVectorIndexOptions(
1272+
indexVersionCreated,
1273+
modelSettings
1274+
);
1275+
defaultIndexOptions = denseVectorIndexOptions == null
1276+
? null
1277+
: new SemanticTextIndexOptions(SemanticTextIndexOptions.SupportedIndexOptions.DENSE_VECTOR, denseVectorIndexOptions);
1278+
}
1279+
1280+
return defaultIndexOptions;
12381281
}
12391282

12401283
private static boolean canMergeModelSettings(MinimalServiceSettings previous, MinimalServiceSettings current, Conflicts conflicts) {

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextIndexOptions.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import java.util.Arrays;
2121
import java.util.Locale;
2222
import java.util.Map;
23+
import java.util.Objects;
2324

2425
/**
2526
* Represents index options for a semantic_text field.
@@ -50,6 +51,18 @@ public IndexOptions indexOptions() {
5051
return indexOptions;
5152
}
5253

54+
@Override
55+
public boolean equals(Object o) {
56+
if (o instanceof SemanticTextIndexOptions == false) return false;
57+
SemanticTextIndexOptions that = (SemanticTextIndexOptions) o;
58+
return type == that.type && Objects.equals(indexOptions, that.indexOptions);
59+
}
60+
61+
@Override
62+
public int hashCode() {
63+
return Objects.hash(type, indexOptions);
64+
}
65+
5366
public enum SupportedIndexOptions {
5467
DENSE_VECTOR("dense_vector") {
5568
@Override

0 commit comments

Comments
 (0)