Skip to content

Commit 9912426

Browse files
committed
Always emit index options, even when using defaults
1 parent f9127eb commit 9912426

File tree

1 file changed

+82
-33
lines changed

1 file changed

+82
-33
lines changed

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java

Lines changed: 82 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,7 @@ public static class Builder extends FieldMapper.Builder {
224224

225225
private MinimalServiceSettings resolvedModelSettings;
226226
private Function<MapperBuilderContext, ObjectMapper> inferenceFieldBuilder;
227+
private final IndexVersion indexVersionCreated;
227228

228229
public static Builder from(SemanticTextFieldMapper mapper) {
229230
Builder builder = new Builder(
@@ -245,9 +246,10 @@ public Builder(
245246
super(name);
246247
this.modelRegistry = modelRegistry;
247248
this.useLegacyFormat = InferenceMetadataFieldsMapper.isEnabled(indexSettings.getSettings()) == false;
249+
this.indexVersionCreated = indexSettings.getIndexVersionCreated();
248250
this.inferenceFieldBuilder = c -> createInferenceField(
249251
c,
250-
indexSettings.getIndexVersionCreated(),
252+
indexVersionCreated,
251253
useLegacyFormat,
252254
resolvedModelSettings,
253255
indexOptions.get(),
@@ -303,7 +305,8 @@ public SemanticTextFieldMapper build(MapperBuilderContext context) {
303305
throw new IllegalArgumentException(CONTENT_TYPE + " field [" + leafName() + "] does not support multi-fields");
304306
}
305307

306-
if (context.getMergeReason() != MapperService.MergeReason.MAPPING_RECOVERY && modelSettings.get() == null) {
308+
resolvedModelSettings = modelSettings.get();
309+
if (context.getMergeReason() != MapperService.MergeReason.MAPPING_RECOVERY && resolvedModelSettings == null) {
307310
try {
308311
/*
309312
* If the model is not already set and we are not in a recovery scenario, resolve it using the registry.
@@ -326,17 +329,35 @@ public SemanticTextFieldMapper build(MapperBuilderContext context) {
326329
inferenceId.get()
327330
);
328331
}
329-
} else {
330-
resolvedModelSettings = modelSettings.get();
331332
}
332333

333-
if (modelSettings.get() != null) {
334-
validateServiceSettings(modelSettings.get(), resolvedModelSettings);
334+
// I don't think we need this - TODO delete this
335+
// if (modelSettings.get() != null) {
336+
// validateServiceSettings(modelSettings.get(), resolvedModelSettings);
337+
// }
338+
339+
// Validate any specified index options against existing or default index options
340+
SemanticTextIndexOptions resolvedIndexOptions = indexOptions.getValue();
341+
if (context.getMergeReason() != MapperService.MergeReason.MAPPING_RECOVERY) {
342+
if (indexOptions.get() != null) {
343+
// We've specified index options in this request, so we need to validate that they're compatible with our model
344+
validateIndexOptions(indexOptions.get(), inferenceId.getValue(), resolvedModelSettings);
345+
} else if (resolvedModelSettings != null) {
346+
// If we know enough about the model to specify index options, ensure we capture the correct defaults
347+
DenseVectorFieldMapper.DenseVectorIndexOptions defaultIndexOptions = defaultDenseVectorIndexOptions(
348+
indexVersionCreated,
349+
resolvedModelSettings
350+
);
351+
if (defaultIndexOptions != null) {
352+
resolvedIndexOptions = new SemanticTextIndexOptions(
353+
SemanticTextIndexOptions.SupportedIndexOptions.DENSE_VECTOR,
354+
defaultIndexOptions
355+
);
356+
validateIndexOptions(resolvedIndexOptions, inferenceId.getValue(), resolvedModelSettings);
357+
}
358+
}
335359
}
336360

337-
if (context.getMergeReason() != MapperService.MergeReason.MAPPING_RECOVERY && indexOptions.get() != null) {
338-
validateIndexOptions(indexOptions.get(), inferenceId.getValue(), resolvedModelSettings);
339-
}
340361
final String fullName = context.buildFullName(leafName());
341362

342363
if (context.isInNestedContext()) {
@@ -353,7 +374,7 @@ public SemanticTextFieldMapper build(MapperBuilderContext context) {
353374
searchInferenceId.getValue(),
354375
modelSettings.getValue(),
355376
chunkingSettings.getValue(),
356-
indexOptions.getValue(),
377+
resolvedIndexOptions,
357378
inferenceField,
358379
useLegacyFormat,
359380
meta.getValue()
@@ -1143,31 +1164,21 @@ private static Mapper.Builder createEmbeddingsField(
11431164
denseVectorMapperBuilder.indexOptions(denseVectorIndexOptions);
11441165
denseVectorIndexOptions.validate(modelSettings.elementType(), modelSettings.dimensions(), true);
11451166
} else {
1146-
DenseVectorFieldMapper.DenseVectorIndexOptions defaultIndexOptions = null;
1147-
if (indexVersionCreated.onOrAfter(SEMANTIC_TEXT_DEFAULTS_TO_BBQ)
1148-
|| indexVersionCreated.between(
1149-
SEMANTIC_TEXT_DEFAULTS_TO_BBQ_BACKPORT_8_X,
1150-
IndexVersions.UPGRADE_TO_LUCENE_10_0_0
1151-
)) {
1152-
defaultIndexOptions = defaultDenseVectorIndexOptions();
1153-
}
1154-
if (defaultIndexOptions != null
1155-
&& defaultIndexOptions.validate(modelSettings.elementType(), modelSettings.dimensions(), false)) {
1167+
DenseVectorFieldMapper.DenseVectorIndexOptions defaultIndexOptions = defaultDenseVectorIndexOptions(
1168+
indexVersionCreated,
1169+
modelSettings
1170+
);
1171+
if (defaultIndexOptions != null) {
11561172
denseVectorMapperBuilder.indexOptions(defaultIndexOptions);
11571173
}
11581174
}
11591175

11601176
boolean hasUserSpecifiedIndexOptions = indexOptions != null;
11611177
DenseVectorFieldMapper.DenseVectorIndexOptions denseVectorIndexOptions = hasUserSpecifiedIndexOptions
11621178
? (DenseVectorFieldMapper.DenseVectorIndexOptions) indexOptions.indexOptions()
1163-
: (indexVersionCreated.onOrAfter(SEMANTIC_TEXT_DEFAULTS_TO_BBQ) ? defaultDenseVectorIndexOptions() : null);
1164-
1165-
if (denseVectorIndexOptions != null
1166-
&& denseVectorIndexOptions.validate(
1167-
modelSettings.elementType(),
1168-
modelSettings.dimensions(),
1169-
hasUserSpecifiedIndexOptions
1170-
)) {
1179+
: defaultDenseVectorIndexOptions(indexVersionCreated, modelSettings);
1180+
1181+
if (denseVectorIndexOptions != null) {
11711182
denseVectorMapperBuilder.indexOptions(denseVectorIndexOptions);
11721183
}
11731184

@@ -1177,17 +1188,55 @@ private static Mapper.Builder createEmbeddingsField(
11771188
};
11781189
}
11791190

1180-
static DenseVectorFieldMapper.DenseVectorIndexOptions defaultDenseVectorIndexOptions() {
1191+
static DenseVectorFieldMapper.DenseVectorIndexOptions defaultDenseVectorIndexOptions(
1192+
IndexVersion indexVersionCreated,
1193+
MinimalServiceSettings modelSettings
1194+
) {
1195+
DenseVectorFieldMapper.DenseVectorIndexOptions defaultIndexOptions = null;
1196+
11811197
// As embedding models for text perform better with BBQ, we aggressively default semantic_text fields to use optimized index
1182-
// options outside of dense_vector defaults
1198+
// options
1199+
if (indexVersionCreated.onOrAfter(SEMANTIC_TEXT_DEFAULTS_TO_BBQ)
1200+
|| indexVersionCreated.between(SEMANTIC_TEXT_DEFAULTS_TO_BBQ_BACKPORT_8_X, IndexVersions.UPGRADE_TO_LUCENE_10_0_0)) {
1201+
DenseVectorFieldMapper.DenseVectorIndexOptions defaultBbqHnswIndexOptions = defaultBbqHnswDenseVectorIndexOptions();
1202+
defaultIndexOptions = defaultBbqHnswIndexOptions.validate(modelSettings.elementType(), modelSettings.dimensions(), false)
1203+
? defaultBbqHnswIndexOptions
1204+
: null;
1205+
}
1206+
1207+
// Older indices or those incompatible with BBQ will continue to use legacy defaults
1208+
if (defaultIndexOptions == null) {
1209+
defaultIndexOptions = legacyDefaultDenseVectorIndexOptions();
1210+
}
1211+
1212+
return defaultIndexOptions.validate(modelSettings.elementType(), modelSettings.dimensions(), false) ? defaultIndexOptions : null;
1213+
}
1214+
1215+
static DenseVectorFieldMapper.DenseVectorIndexOptions defaultBbqHnswDenseVectorIndexOptions() {
11831216
int m = Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN;
11841217
int efConstruction = Lucene99HnswVectorsFormat.DEFAULT_BEAM_WIDTH;
11851218
DenseVectorFieldMapper.RescoreVector rescoreVector = new DenseVectorFieldMapper.RescoreVector(DEFAULT_RESCORE_OVERSAMPLE);
11861219
return new DenseVectorFieldMapper.BBQHnswIndexOptions(m, efConstruction, rescoreVector);
11871220
}
11881221

1189-
static SemanticTextIndexOptions defaultSemanticDenseIndexOptions() {
1190-
return new SemanticTextIndexOptions(SemanticTextIndexOptions.SupportedIndexOptions.DENSE_VECTOR, defaultDenseVectorIndexOptions());
1222+
/**
1223+
* These are the default index options for dense vector fields that were used before semantic_text defaulted to BBQ,
1224+
* and are still used for models that are incompatible with BBQ.
1225+
*/
1226+
private static DenseVectorFieldMapper.DenseVectorIndexOptions legacyDefaultDenseVectorIndexOptions() {
1227+
int m = Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN;
1228+
int efConstruction = Lucene99HnswVectorsFormat.DEFAULT_BEAM_WIDTH;
1229+
return new DenseVectorFieldMapper.Int8HnswIndexOptions(m, efConstruction, null, null);
1230+
}
1231+
1232+
static SemanticTextIndexOptions defaultSemanticDenseIndexOptions(
1233+
IndexVersion indexVersionCreated,
1234+
MinimalServiceSettings modelSettings
1235+
) {
1236+
return new SemanticTextIndexOptions(
1237+
SemanticTextIndexOptions.SupportedIndexOptions.DENSE_VECTOR,
1238+
defaultDenseVectorIndexOptions(indexVersionCreated, modelSettings)
1239+
);
11911240
}
11921241

11931242
private static boolean canMergeModelSettings(MinimalServiceSettings previous, MinimalServiceSettings current, Conflicts conflicts) {
@@ -1202,7 +1251,7 @@ private static boolean canMergeModelSettings(MinimalServiceSettings previous, Mi
12021251
}
12031252

12041253
private static boolean canMergeIndexOptions(SemanticTextIndexOptions previous, SemanticTextIndexOptions current, Conflicts conflicts) {
1205-
if (Objects.equals(previous, current) || (previous == null && current == null)) {
1254+
if (Objects.equals(previous, current)) {
12061255
return true;
12071256
}
12081257

0 commit comments

Comments
 (0)