From 1312cd6fdec87e26c53be19e10ab16caa8b3456e Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Thu, 2 Oct 2025 14:31:36 -0400 Subject: [PATCH 1/3] Defer Semantic Text Failures on Pre-8.11 Indices (#135845) (cherry picked from commit 66d924132c186333ec4f2232b170d0437974af58) # Conflicts: # server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java # x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java --- docs/changelog/135845.yaml | 5 + .../vectors/DenseVectorFieldMapperTests.java | 3 +- .../mapper/SemanticTextFieldMapper.java | 54 +++++-- .../mapper/SemanticTextFieldMapperTests.java | 144 +++++++++++------- 4 files changed, 139 insertions(+), 67 deletions(-) create mode 100644 docs/changelog/135845.yaml diff --git a/docs/changelog/135845.yaml b/docs/changelog/135845.yaml new file mode 100644 index 0000000000000..032a01a5b3542 --- /dev/null +++ b/docs/changelog/135845.yaml @@ -0,0 +1,5 @@ +pr: 135845 +summary: Fix for creating semantic_text fields on pre-8.11 indices crashing Elasticsearch +area: Mapping +type: bug +issues: [] diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java index b9dcb88189bec..8f17dfa8fd56e 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java @@ -68,7 +68,6 @@ import static org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase.randomNormalizedVector; import static org.elasticsearch.index.codec.vectors.IVFVectorsFormat.DYNAMIC_NPROBE; import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.DEFAULT_OVERSAMPLE; -import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.INDEXED_BY_DEFAULT_INDEX_VERSION; import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.IVF_FORMAT; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; @@ -108,7 +107,7 @@ private void indexMapping(XContentBuilder b, IndexVersion indexVersion) throws I if (elementType != ElementType.FLOAT) { b.field("element_type", elementType.toString()); } - if (indexVersion.onOrAfter(INDEXED_BY_DEFAULT_INDEX_VERSION) || indexed) { + if (indexVersion.onOrAfter(DenseVectorFieldMapper.INDEXED_BY_DEFAULT_INDEX_VERSION) || indexed) { // Serialize if it's new index version, or it was not the default for previous indices b.field("index", indexed); } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java index be0349c11f402..ee6a7c909b238 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java @@ -125,7 +125,7 @@ */ public class SemanticTextFieldMapper extends FieldMapper implements InferenceFieldMapper { private static final Logger logger = LogManager.getLogger(SemanticTextFieldMapper.class); - public static final String UNSUPPORTED_INDEX_MESSAGE = "[semantic_text] is available on indices created with 8.11 or higher."; + public static final NodeFeature SEMANTIC_TEXT_IN_OBJECT_FIELD_FIX = new NodeFeature("semantic_text.in_object_field_fix"); public static final NodeFeature SEMANTIC_TEXT_SINGLE_FIELD_UPDATE_FIX = new NodeFeature("semantic_text.single_field_update_fix"); public static final NodeFeature SEMANTIC_TEXT_DELETE_FIX = new NodeFeature("semantic_text.delete_fix"); @@ -145,6 +145,12 @@ public class SemanticTextFieldMapper extends FieldMapper implements InferenceFie public static final String CONTENT_TYPE = "semantic_text"; public static final String DEFAULT_ELSER_2_INFERENCE_ID = DEFAULT_ELSER_ID; + public static final String UNSUPPORTED_INDEX_MESSAGE = "[" + + CONTENT_TYPE + + "] is available on indices created with 8.11 or higher. Please create a new index to use [" + + CONTENT_TYPE + + "]"; + public static final float DEFAULT_RESCORE_OVERSAMPLE = 3.0f; static final String INDEX_OPTIONS_FIELD = "index_options"; @@ -158,9 +164,6 @@ public static final TypeParser parser(Supplier modelRegistry) { public static BiConsumer validateParserContext(String type) { return (n, c) -> { - if (c.getIndexSettings().getIndexVersionCreated().before(NEW_SPARSE_VECTOR)) { - throw new UnsupportedOperationException(UNSUPPORTED_INDEX_MESSAGE); - } if (InferenceMetadataFieldsMapper.isEnabled(c.getIndexSettings().getSettings()) == false) { notInMultiFields(type).accept(n, c); } @@ -548,16 +551,33 @@ SemanticTextField parseSemanticTextField(DocumentParserContext context) throws I if (parser.currentToken() == XContentParser.Token.VALUE_NULL) { return null; } + + SemanticTextField semanticTextField; boolean isWithinLeaf = context.path().isWithinLeafObject(); try { context.path().setWithinLeafObject(true); - return SemanticTextField.parse( + semanticTextField = SemanticTextField.parse( context.parser(), new SemanticTextField.ParserContext(fieldType().useLegacyFormat, fullPath(), context.parser().contentType()) ); } finally { context.path().setWithinLeafObject(isWithinLeaf); } + + IndexVersion indexCreatedVersion = context.indexSettings().getIndexVersionCreated(); + if (semanticTextField != null + && semanticTextField.inference().modelSettings() != null + && indexCreatedVersion.before(NEW_SPARSE_VECTOR)) { + // Explicitly fail to parse semantic text fields that meet the following criteria: + // - Are in pre 8.11 indices + // - Have model settings, indicating that they have embeddings to be indexed + // + // We can't fail earlier than this because it causes pre 8.11 indices with semantic text fields to either be in red state or + // cause Elasticsearch to not launch. + throw new UnsupportedOperationException(UNSUPPORTED_INDEX_MESSAGE); + } + + return semanticTextField; } void parseCreateFieldFromContext(DocumentParserContext context, SemanticTextField field, XContentLocation xContentLocation) @@ -1158,17 +1178,23 @@ private static Mapper.Builder createEmbeddingsField( indexVersionCreated ); - SimilarityMeasure similarity = modelSettings.similarity(); - if (similarity != null) { - switch (similarity) { - case COSINE -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.COSINE); - case DOT_PRODUCT -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.DOT_PRODUCT); - case L2_NORM -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.L2_NORM); - default -> throw new IllegalArgumentException( - "Unknown similarity measure in model_settings [" + similarity.name() + "]" - ); + // Skip setting similarity on pre 8.11 indices. It causes dense vector field creation to fail because similarity can only be set + // on indexed fields, which is not done by default prior to 8.11. The fact that the dense vector field is partially configured is + // moot because we will explicitly fail to index docs into this semantic text field anyways. + if (indexVersionCreated.onOrAfter(NEW_SPARSE_VECTOR)) { + SimilarityMeasure similarity = modelSettings.similarity(); + if (similarity != null) { + switch (similarity) { + case COSINE -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.COSINE); + case DOT_PRODUCT -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.DOT_PRODUCT); + case L2_NORM -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.L2_NORM); + default -> throw new IllegalArgumentException( + "Unknown similarity measure in model_settings [" + similarity.name() + "]" + ); + } } } + denseVectorMapperBuilder.dimensions(modelSettings.dimensions()); denseVectorMapperBuilder.elementType(modelSettings.elementType()); if (indexOptions != null) { diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java index 4ff4c79be7d28..27aed25904c2b 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java @@ -403,57 +403,6 @@ public void testInvalidTaskTypes() { } } - @Override - protected IndexVersion boostNotAllowedIndexVersion() { - return IndexVersions.NEW_SPARSE_VECTOR; - } - - public void testOldIndexSemanticTextDenseVectorRaisesError() throws IOException { - final String fieldName = "field"; - final XContentBuilder fieldMapping = fieldMapping(b -> { - b.field("type", "semantic_text"); - b.field(INFERENCE_ID_FIELD, "test_inference_id"); - b.startObject("model_settings"); - b.field("task_type", "text_embedding"); - b.field("dimensions", 384); - b.field("similarity", "cosine"); - b.field("element_type", "float"); - b.endObject(); - }); - assertOldIndexUnsupported(fieldMapping); - } - - public void testOldIndexSemanticTextMinimalMappingRaisesError() throws IOException { - final XContentBuilder fieldMapping = fieldMapping(this::minimalMapping); - assertOldIndexUnsupported(fieldMapping); - } - - public void testOldIndexSemanticTextSparseVersionRaisesError() throws IOException { - final XContentBuilder fieldMapping = fieldMapping(b -> { - b.field("type", "semantic_text"); - b.field("inference_id", "another_inference_id"); - b.startObject("model_settings"); - b.field("task_type", "sparse_embedding"); - b.endObject(); - }); - assertOldIndexUnsupported(fieldMapping); - } - - private void assertOldIndexUnsupported(XContentBuilder fieldMapping) { - - MapperParsingException exception = assertThrows( - MapperParsingException.class, - () -> createMapperService( - fieldMapping, - true, - IndexVersions.V_8_0_0, - IndexVersionUtils.getPreviousVersion(IndexVersions.NEW_SPARSE_VECTOR) - ) - ); - assertTrue(exception.getMessage().contains(UNSUPPORTED_INDEX_MESSAGE)); - assertTrue(exception.getRootCause() instanceof UnsupportedOperationException); - } - public void testMultiFieldsSupport() throws IOException { if (useLegacyFormat) { Exception e = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> { @@ -1134,6 +1083,99 @@ public void testModelSettingsRequiredWithChunks() throws IOException { assertThat(ex.getMessage(), containsString("[model_settings] must be set for field [field] when chunks are provided")); } + public void testPre811IndexSemanticTextDenseVectorRaisesError() throws IOException { + Model model = TestModel.createRandomInstance(TaskType.TEXT_EMBEDDING); + String fieldName = randomAlphaOfLength(8); + + MapperService mapperService = createMapperService( + mapping( + b -> b.startObject(fieldName).field("type", "semantic_text").field("inference_id", model.getInferenceEntityId()).endObject() + ), + true, + IndexVersions.V_8_0_0, + IndexVersionUtils.getPreviousVersion(IndexVersions.NEW_SPARSE_VECTOR) + ); + assertSemanticTextField(mapperService, fieldName, false, null, null); + + merge( + mapperService, + mapping( + b -> b.startObject(fieldName) + .field("type", "semantic_text") + .field("inference_id", model.getInferenceEntityId()) + .startObject("model_settings") + .field("task_type", TaskType.TEXT_EMBEDDING.toString()) + .field("dimensions", model.getServiceSettings().dimensions()) + .field("similarity", model.getServiceSettings().similarity()) + .field("element_type", model.getServiceSettings().elementType()) + .endObject() + .endObject() + ) + ); + assertSemanticTextField(mapperService, fieldName, true, null, null); + + DocumentMapper documentMapper = mapperService.documentMapper(); + DocumentParsingException e = assertThrows( + DocumentParsingException.class, + () -> documentMapper.parse( + source( + b -> addSemanticTextInferenceResults( + true, + b, + List.of(randomSemanticText(true, fieldName, model, null, List.of("foo", "bar"), XContentType.JSON)) + ) + ) + ) + ); + assertThat(e.getCause(), instanceOf(UnsupportedOperationException.class)); + assertThat(e.getCause().getMessage(), equalTo(UNSUPPORTED_INDEX_MESSAGE)); + } + + public void testPre811IndexSemanticTextSparseVectorRaisesError() throws IOException { + Model model = TestModel.createRandomInstance(TaskType.SPARSE_EMBEDDING); + String fieldName = randomAlphaOfLength(8); + + MapperService mapperService = createMapperService( + mapping( + b -> b.startObject(fieldName).field("type", "semantic_text").field("inference_id", model.getInferenceEntityId()).endObject() + ), + true, + IndexVersions.V_8_0_0, + IndexVersionUtils.getPreviousVersion(IndexVersions.NEW_SPARSE_VECTOR) + ); + assertSemanticTextField(mapperService, fieldName, false, null, null); + + merge( + mapperService, + mapping( + b -> b.startObject(fieldName) + .field("type", "semantic_text") + .field("inference_id", model.getInferenceEntityId()) + .startObject("model_settings") + .field("task_type", TaskType.SPARSE_EMBEDDING.toString()) + .endObject() + .endObject() + ) + ); + assertSemanticTextField(mapperService, fieldName, true, null, null); + + DocumentMapper documentMapper = mapperService.documentMapper(); + DocumentParsingException e = assertThrows( + DocumentParsingException.class, + () -> documentMapper.parse( + source( + b -> addSemanticTextInferenceResults( + true, + b, + List.of(randomSemanticText(true, fieldName, model, null, List.of("foo", "bar"), XContentType.JSON)) + ) + ) + ) + ); + assertThat(e.getCause(), instanceOf(UnsupportedOperationException.class)); + assertThat(e.getCause().getMessage(), equalTo(UNSUPPORTED_INDEX_MESSAGE)); + } + private MapperService mapperServiceForFieldWithModelSettings(String fieldName, String inferenceId, MinimalServiceSettings modelSettings) throws IOException { return mapperServiceForFieldWithModelSettings(fieldName, inferenceId, null, modelSettings); From 2483653c0764110904a08daeb05946320bacf96d Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Thu, 2 Oct 2025 19:01:06 +0000 Subject: [PATCH 2/3] [CI] Auto commit changes from spotless --- .../xpack/inference/mapper/SemanticTextFieldMapper.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java index ee6a7c909b238..243ac32c786f2 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java @@ -1178,8 +1178,10 @@ private static Mapper.Builder createEmbeddingsField( indexVersionCreated ); - // Skip setting similarity on pre 8.11 indices. It causes dense vector field creation to fail because similarity can only be set - // on indexed fields, which is not done by default prior to 8.11. The fact that the dense vector field is partially configured is + // Skip setting similarity on pre 8.11 indices. It causes dense vector field creation to fail because similarity can only be + // set + // on indexed fields, which is not done by default prior to 8.11. The fact that the dense vector field is partially + // configured is // moot because we will explicitly fail to index docs into this semantic text field anyways. if (indexVersionCreated.onOrAfter(NEW_SPARSE_VECTOR)) { SimilarityMeasure similarity = modelSettings.similarity(); From eea2341a2b5de591c3ce188ca799f095c182caf8 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Thu, 2 Oct 2025 15:21:40 -0400 Subject: [PATCH 3/3] Update comment --- .../xpack/inference/mapper/SemanticTextFieldMapper.java | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java index 243ac32c786f2..b718493a37790 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java @@ -1179,10 +1179,8 @@ private static Mapper.Builder createEmbeddingsField( ); // Skip setting similarity on pre 8.11 indices. It causes dense vector field creation to fail because similarity can only be - // set - // on indexed fields, which is not done by default prior to 8.11. The fact that the dense vector field is partially - // configured is - // moot because we will explicitly fail to index docs into this semantic text field anyways. + // set on indexed fields, which is not done by default prior to 8.11. The fact that the dense vector field is partially + // configured is moot because we will explicitly fail to index docs into this semantic text field anyways. if (indexVersionCreated.onOrAfter(NEW_SPARSE_VECTOR)) { SimilarityMeasure similarity = modelSettings.similarity(); if (similarity != null) {