Skip to content

Commit c4f7b97

Browse files
authored
Fix model settings propagation when merging semantic text fields (#129438)
Ensure that model settings are correctly set during mapping merges. While this is not an issue currently, since the underlying embedding field is not customizable, this fix is required for correct behavior in #119967.
1 parent b24bb35 commit c4f7b97

File tree

1 file changed

+47
-30
lines changed

1 file changed

+47
-30
lines changed

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java

Lines changed: 47 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,6 @@ public static class Builder extends FieldMapper.Builder {
212212

213213
private final Parameter<Map<String, String>> meta = Parameter.metaParam();
214214

215-
private MinimalServiceSettings resolvedModelSettings;
216215
private Function<MapperBuilderContext, ObjectMapper> inferenceFieldBuilder;
217216

218217
public static Builder from(SemanticTextFieldMapper mapper) {
@@ -235,14 +234,18 @@ public Builder(
235234
super(name);
236235
this.modelRegistry = modelRegistry;
237236
this.useLegacyFormat = InferenceMetadataFieldsMapper.isEnabled(indexSettings.getSettings()) == false;
238-
this.inferenceFieldBuilder = c -> createInferenceField(
239-
c,
240-
indexSettings.getIndexVersionCreated(),
241-
useLegacyFormat,
242-
resolvedModelSettings,
243-
bitSetProducer,
244-
indexSettings
245-
);
237+
this.inferenceFieldBuilder = c -> {
238+
// Resolve the model setting from the registry if it has not been set yet.
239+
var resolvedModelSettings = modelSettings.get() != null ? modelSettings.get() : getResolvedModelSettings(c, false);
240+
return createInferenceField(
241+
c,
242+
indexSettings.getIndexVersionCreated(),
243+
useLegacyFormat,
244+
resolvedModelSettings,
245+
bitSetProducer,
246+
indexSettings
247+
);
248+
};
246249
}
247250

248251
public Builder setInferenceId(String id) {
@@ -283,26 +286,26 @@ protected void merge(FieldMapper mergeWith, Conflicts conflicts, MapperMergeCont
283286
inferenceFieldBuilder = c -> mergedInferenceField;
284287
}
285288

286-
@Override
287-
public SemanticTextFieldMapper build(MapperBuilderContext context) {
288-
if (useLegacyFormat && copyTo.copyToFields().isEmpty() == false) {
289-
throw new IllegalArgumentException(CONTENT_TYPE + " field [" + leafName() + "] does not support [copy_to]");
290-
}
291-
if (useLegacyFormat && multiFieldsBuilder.hasMultiFields()) {
292-
throw new IllegalArgumentException(CONTENT_TYPE + " field [" + leafName() + "] does not support multi-fields");
289+
/**
290+
* Returns the {@link MinimalServiceSettings} defined in this builder if set;
291+
* otherwise, resolves and returns the settings from the registry.
292+
*/
293+
private MinimalServiceSettings getResolvedModelSettings(MapperBuilderContext context, boolean logWarning) {
294+
if (context.getMergeReason() == MapperService.MergeReason.MAPPING_RECOVERY) {
295+
// the model registry is not available yet
296+
return null;
293297
}
294-
295-
if (context.getMergeReason() != MapperService.MergeReason.MAPPING_RECOVERY && modelSettings.get() == null) {
296-
try {
297-
/*
298-
* If the model is not already set and we are not in a recovery scenario, resolve it using the registry.
299-
* Note: We do not set the model in the mapping at this stage. Instead, the model will be added through
300-
* a mapping update during the first ingestion.
301-
* This approach allows mappings to reference inference endpoints that may not yet exist.
302-
* The only requirement is that the referenced inference endpoint must be available at the time of ingestion.
303-
*/
304-
resolvedModelSettings = modelRegistry.getMinimalServiceSettings(inferenceId.get());
305-
} catch (ResourceNotFoundException exc) {
298+
try {
299+
/*
300+
* If the model is not already set and we are not in a recovery scenario, resolve it using the registry.
301+
* Note: We do not set the model in the mapping at this stage. Instead, the model will be added through
302+
* a mapping update during the first ingestion.
303+
* This approach allows mappings to reference inference endpoints that may not yet exist.
304+
* The only requirement is that the referenced inference endpoint must be available at the time of ingestion.
305+
*/
306+
return modelRegistry.getMinimalServiceSettings(inferenceId.get());
307+
} catch (ResourceNotFoundException exc) {
308+
if (logWarning) {
306309
/* We allow the inference ID to be unregistered at this point.
307310
* This will delay the creation of sub-fields, so indexing and querying for this field won't work
308311
* until the corresponding inference endpoint is created.
@@ -315,8 +318,22 @@ public SemanticTextFieldMapper build(MapperBuilderContext context) {
315318
inferenceId.get()
316319
);
317320
}
318-
} else {
319-
resolvedModelSettings = modelSettings.get();
321+
return null;
322+
}
323+
}
324+
325+
@Override
326+
public SemanticTextFieldMapper build(MapperBuilderContext context) {
327+
if (useLegacyFormat && copyTo.copyToFields().isEmpty() == false) {
328+
throw new IllegalArgumentException(CONTENT_TYPE + " field [" + leafName() + "] does not support [copy_to]");
329+
}
330+
if (useLegacyFormat && multiFieldsBuilder.hasMultiFields()) {
331+
throw new IllegalArgumentException(CONTENT_TYPE + " field [" + leafName() + "] does not support multi-fields");
332+
}
333+
334+
var resolvedModelSettings = modelSettings.get();
335+
if (modelSettings.get() == null) {
336+
resolvedModelSettings = getResolvedModelSettings(context, true);
320337
}
321338

322339
if (modelSettings.get() != null) {

0 commit comments

Comments
 (0)