From d19ed5d9113b6acd296ddd5752f74764f380b05c Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Thu, 2 Oct 2025 08:22:10 -0400 Subject: [PATCH 1/9] Revert "refactor(semantic_text): fail early in pre-8.11 indices (#133080)" This reverts commit 8f41a4bb1f2320a7fbdbb2d88bbe3ca1a537d84d. --- .../vectors/DenseVectorFieldMapperTests.java | 3 +- .../mapper/SemanticTextFieldMapper.java | 5 -- .../mapper/SemanticTextFieldMapperTests.java | 52 ------------------- 3 files changed, 1 insertion(+), 59 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java index eed68d4c3ac0c..66b66c62d8e60 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java @@ -67,7 +67,6 @@ import static org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase.randomNormalizedVector; import static org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat.DYNAMIC_VISIT_RATIO; import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.DEFAULT_OVERSAMPLE; -import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.INDEXED_BY_DEFAULT_INDEX_VERSION; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.instanceOf; @@ -106,7 +105,7 @@ private void indexMapping(XContentBuilder b, IndexVersion indexVersion) throws I if (elementType != ElementType.FLOAT) { b.field("element_type", elementType.toString()); } - if (indexVersion.onOrAfter(INDEXED_BY_DEFAULT_INDEX_VERSION) || indexed) { + if (indexVersion.onOrAfter(DenseVectorFieldMapper.INDEXED_BY_DEFAULT_INDEX_VERSION) || indexed) { // Serialize if it's new index version, or it was not the default for previous indices b.field("index", indexed); } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java index ddebc3938ea6a..8f5db892d9420 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java @@ -100,7 +100,6 @@ import java.util.function.Function; import java.util.function.Supplier; -import static org.elasticsearch.index.IndexVersions.NEW_SPARSE_VECTOR; import static org.elasticsearch.index.IndexVersions.SEMANTIC_TEXT_DEFAULTS_TO_BBQ; import static org.elasticsearch.index.IndexVersions.SEMANTIC_TEXT_DEFAULTS_TO_BBQ_BACKPORT_8_X; import static org.elasticsearch.inference.TaskType.SPARSE_EMBEDDING; @@ -127,7 +126,6 @@ */ public class SemanticTextFieldMapper extends FieldMapper implements InferenceFieldMapper { private static final Logger logger = LogManager.getLogger(SemanticTextFieldMapper.class); - public static final String UNSUPPORTED_INDEX_MESSAGE = "[semantic_text] is available on indices created with 8.11 or higher."; public static final NodeFeature SEMANTIC_TEXT_IN_OBJECT_FIELD_FIX = new NodeFeature("semantic_text.in_object_field_fix"); public static final NodeFeature SEMANTIC_TEXT_SINGLE_FIELD_UPDATE_FIX = new NodeFeature("semantic_text.single_field_update_fix"); public static final NodeFeature SEMANTIC_TEXT_DELETE_FIX = new NodeFeature("semantic_text.delete_fix"); @@ -166,9 +164,6 @@ public static final TypeParser parser(Supplier modelRegistry) { public static BiConsumer validateParserContext(String type) { return (n, c) -> { - if (c.getIndexSettings().getIndexVersionCreated().before(NEW_SPARSE_VECTOR)) { - throw new UnsupportedOperationException(UNSUPPORTED_INDEX_MESSAGE); - } if (InferenceMetadataFieldsMapper.isEnabled(c.getIndexSettings().getSettings()) == false) { notInMultiFields(type).accept(n, c); } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java index 838e4576716ff..c43240165920e 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java @@ -111,7 +111,6 @@ import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper.DEFAULT_ELSER_2_INFERENCE_ID; import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper.DEFAULT_RESCORE_OVERSAMPLE; import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper.INDEX_OPTIONS_FIELD; -import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper.UNSUPPORTED_INDEX_MESSAGE; import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldTests.generateRandomChunkingSettings; import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldTests.generateRandomChunkingSettingsOtherThan; import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldTests.randomSemanticText; @@ -415,57 +414,6 @@ public void testInvalidTaskTypes() { } } - @Override - protected IndexVersion boostNotAllowedIndexVersion() { - return IndexVersions.NEW_SPARSE_VECTOR; - } - - public void testOldIndexSemanticTextDenseVectorRaisesError() throws IOException { - final String fieldName = "field"; - final XContentBuilder fieldMapping = fieldMapping(b -> { - b.field("type", "semantic_text"); - b.field(INFERENCE_ID_FIELD, "test_inference_id"); - b.startObject("model_settings"); - b.field("task_type", "text_embedding"); - b.field("dimensions", 384); - b.field("similarity", "cosine"); - b.field("element_type", "float"); - b.endObject(); - }); - assertOldIndexUnsupported(fieldMapping); - } - - public void testOldIndexSemanticTextMinimalMappingRaisesError() throws IOException { - final XContentBuilder fieldMapping = fieldMapping(this::minimalMapping); - assertOldIndexUnsupported(fieldMapping); - } - - public void testOldIndexSemanticTextSparseVersionRaisesError() throws IOException { - final XContentBuilder fieldMapping = fieldMapping(b -> { - b.field("type", "semantic_text"); - b.field("inference_id", "another_inference_id"); - b.startObject("model_settings"); - b.field("task_type", "sparse_embedding"); - b.endObject(); - }); - assertOldIndexUnsupported(fieldMapping); - } - - private void assertOldIndexUnsupported(XContentBuilder fieldMapping) { - - MapperParsingException exception = assertThrows( - MapperParsingException.class, - () -> createMapperService( - fieldMapping, - true, - IndexVersions.V_8_0_0, - IndexVersionUtils.getPreviousVersion(IndexVersions.NEW_SPARSE_VECTOR) - ) - ); - assertTrue(exception.getMessage().contains(UNSUPPORTED_INDEX_MESSAGE)); - assertTrue(exception.getRootCause() instanceof UnsupportedOperationException); - } - public void testMultiFieldsSupport() throws IOException { if (useLegacyFormat) { Exception e = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> { From 270c6688cacebaee2ee1c87c748b4424e115bb3e Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Thu, 2 Oct 2025 08:48:41 -0400 Subject: [PATCH 2/9] Throw error on first document ingest --- .../inference/mapper/SemanticTextFieldMapper.java | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java index 8f5db892d9420..f3453700b6254 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java @@ -126,6 +126,7 @@ */ public class SemanticTextFieldMapper extends FieldMapper implements InferenceFieldMapper { private static final Logger logger = LogManager.getLogger(SemanticTextFieldMapper.class); + public static final NodeFeature SEMANTIC_TEXT_IN_OBJECT_FIELD_FIX = new NodeFeature("semantic_text.in_object_field_fix"); public static final NodeFeature SEMANTIC_TEXT_SINGLE_FIELD_UPDATE_FIX = new NodeFeature("semantic_text.single_field_update_fix"); public static final NodeFeature SEMANTIC_TEXT_DELETE_FIX = new NodeFeature("semantic_text.delete_fix"); @@ -151,6 +152,12 @@ public class SemanticTextFieldMapper extends FieldMapper implements InferenceFie public static final String CONTENT_TYPE = "semantic_text"; public static final String DEFAULT_ELSER_2_INFERENCE_ID = DEFAULT_ELSER_ID; + public static final String UNSUPPORTED_INDEX_MESSAGE = "[" + + CONTENT_TYPE + + "] is available on indices created with 8.11 or higher. Please create a new index to use [" + + CONTENT_TYPE + + "]"; + public static final float DEFAULT_RESCORE_OVERSAMPLE = 3.0f; static final String INDEX_OPTIONS_FIELD = "index_options"; @@ -1243,6 +1250,10 @@ private static Mapper.Builder createEmbeddingsField( SemanticTextIndexOptions indexOptions, boolean useLegacyFormat ) { + if (indexVersionCreated.before(IndexVersions.NEW_SPARSE_VECTOR)) { + throw new UnsupportedOperationException(UNSUPPORTED_INDEX_MESSAGE); + } + return switch (modelSettings.taskType()) { case SPARSE_EMBEDDING -> { SparseVectorFieldMapper.Builder sparseVectorMapperBuilder = new SparseVectorFieldMapper.Builder( From 89a456b789199cf3cac13159dfcb66efcef9c98f Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Thu, 2 Oct 2025 09:08:10 -0400 Subject: [PATCH 3/9] Added tests --- .../mapper/SemanticTextFieldMapperTests.java | 54 +++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java index c43240165920e..fd6834a100184 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java @@ -111,6 +111,7 @@ import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper.DEFAULT_ELSER_2_INFERENCE_ID; import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper.DEFAULT_RESCORE_OVERSAMPLE; import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper.INDEX_OPTIONS_FIELD; +import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper.UNSUPPORTED_INDEX_MESSAGE; import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldTests.generateRandomChunkingSettings; import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldTests.generateRandomChunkingSettingsOtherThan; import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldTests.randomSemanticText; @@ -1213,6 +1214,59 @@ public void testModelSettingsRequiredWithChunks() throws IOException { assertThat(ex.getMessage(), containsString("[model_settings] must be set for field [field] when chunks are provided")); } + public void testPre811IndexSemanticTextDenseVectorRaisesError() throws IOException { + String fieldName = randomAlphaOfLength(8); + MapperService mapperService = createMapperService( + mapping(b -> b.startObject(fieldName).field("type", "semantic_text").field("inference_id", "test_model").endObject()), + true, + IndexVersions.V_8_0_0, + IndexVersionUtils.getPreviousVersion(IndexVersions.NEW_SPARSE_VECTOR) + ); + assertSemanticTextField(mapperService, fieldName, false, null, null); + + MapperParsingException e = expectThrows(MapperParsingException.class, () -> merge( + mapperService, + mapping( + b -> b.startObject(fieldName) + .field("type", "semantic_text") + .field("inference_id", "test_model") + .startObject("model_settings") + .field("task_type", TaskType.TEXT_EMBEDDING.toString()) + .field("dimensions", 256) + .field("similarity", SimilarityMeasure.COSINE.toString()) + .field("element_type", DenseVectorFieldMapper.ElementType.FLOAT) + .endObject() + .endObject() + ) + )); + assertThat(e.getMessage(), equalTo("Failed to parse mapping: " + UNSUPPORTED_INDEX_MESSAGE)); + } + + public void testPre811IndexSemanticTextSparseVectorRaisesError() throws IOException { + String fieldName = randomAlphaOfLength(8); + MapperService mapperService = createMapperService( + mapping(b -> b.startObject(fieldName).field("type", "semantic_text").field("inference_id", "test_model").endObject()), + true, + IndexVersions.V_8_0_0, + IndexVersionUtils.getPreviousVersion(IndexVersions.NEW_SPARSE_VECTOR) + ); + assertSemanticTextField(mapperService, fieldName, false, null, null); + + MapperParsingException e = expectThrows(MapperParsingException.class, () -> merge( + mapperService, + mapping( + b -> b.startObject(fieldName) + .field("type", "semantic_text") + .field("inference_id", "test_model") + .startObject("model_settings") + .field("task_type", TaskType.SPARSE_EMBEDDING.toString()) + .endObject() + .endObject() + ) + )); + assertThat(e.getMessage(), equalTo("Failed to parse mapping: " + UNSUPPORTED_INDEX_MESSAGE)); + } + private MapperService mapperServiceForFieldWithModelSettings(String fieldName, String inferenceId, MinimalServiceSettings modelSettings) throws IOException { return mapperServiceForFieldWithModelSettings(fieldName, inferenceId, null, modelSettings); From ae80988720bafac1b7c4e8b2659fb1de66ec7318 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Thu, 2 Oct 2025 09:08:57 -0400 Subject: [PATCH 4/9] Spotless --- .../mapper/SemanticTextFieldMapperTests.java | 56 ++++++++++--------- 1 file changed, 31 insertions(+), 25 deletions(-) diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java index fd6834a100184..0a9530be52962 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java @@ -1224,21 +1224,24 @@ public void testPre811IndexSemanticTextDenseVectorRaisesError() throws IOExcepti ); assertSemanticTextField(mapperService, fieldName, false, null, null); - MapperParsingException e = expectThrows(MapperParsingException.class, () -> merge( - mapperService, - mapping( - b -> b.startObject(fieldName) - .field("type", "semantic_text") - .field("inference_id", "test_model") - .startObject("model_settings") - .field("task_type", TaskType.TEXT_EMBEDDING.toString()) - .field("dimensions", 256) - .field("similarity", SimilarityMeasure.COSINE.toString()) - .field("element_type", DenseVectorFieldMapper.ElementType.FLOAT) - .endObject() - .endObject() + MapperParsingException e = expectThrows( + MapperParsingException.class, + () -> merge( + mapperService, + mapping( + b -> b.startObject(fieldName) + .field("type", "semantic_text") + .field("inference_id", "test_model") + .startObject("model_settings") + .field("task_type", TaskType.TEXT_EMBEDDING.toString()) + .field("dimensions", 256) + .field("similarity", SimilarityMeasure.COSINE.toString()) + .field("element_type", DenseVectorFieldMapper.ElementType.FLOAT) + .endObject() + .endObject() + ) ) - )); + ); assertThat(e.getMessage(), equalTo("Failed to parse mapping: " + UNSUPPORTED_INDEX_MESSAGE)); } @@ -1252,18 +1255,21 @@ public void testPre811IndexSemanticTextSparseVectorRaisesError() throws IOExcept ); assertSemanticTextField(mapperService, fieldName, false, null, null); - MapperParsingException e = expectThrows(MapperParsingException.class, () -> merge( - mapperService, - mapping( - b -> b.startObject(fieldName) - .field("type", "semantic_text") - .field("inference_id", "test_model") - .startObject("model_settings") - .field("task_type", TaskType.SPARSE_EMBEDDING.toString()) - .endObject() - .endObject() + MapperParsingException e = expectThrows( + MapperParsingException.class, + () -> merge( + mapperService, + mapping( + b -> b.startObject(fieldName) + .field("type", "semantic_text") + .field("inference_id", "test_model") + .startObject("model_settings") + .field("task_type", TaskType.SPARSE_EMBEDDING.toString()) + .endObject() + .endObject() + ) ) - )); + ); assertThat(e.getMessage(), equalTo("Failed to parse mapping: " + UNSUPPORTED_INDEX_MESSAGE)); } From 776239ee32fa8eb90bf4d8fbc66c6104d803c0b4 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Thu, 2 Oct 2025 09:18:31 -0400 Subject: [PATCH 5/9] Update docs/changelog/135845.yaml --- docs/changelog/135845.yaml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 docs/changelog/135845.yaml diff --git a/docs/changelog/135845.yaml b/docs/changelog/135845.yaml new file mode 100644 index 0000000000000..f586e4657c372 --- /dev/null +++ b/docs/changelog/135845.yaml @@ -0,0 +1,5 @@ +pr: 135845 +summary: Defer Semantic Text Failures on Pre-8.11 Indices +area: Relevance +type: bug +issues: [] From e78712c50d6ade8a845c634d62ef887c6c7eed11 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Thu, 2 Oct 2025 09:23:54 -0400 Subject: [PATCH 6/9] Update changelog --- docs/changelog/135845.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/changelog/135845.yaml b/docs/changelog/135845.yaml index f586e4657c372..25d8c41ca6bcb 100644 --- a/docs/changelog/135845.yaml +++ b/docs/changelog/135845.yaml @@ -1,5 +1,5 @@ pr: 135845 summary: Defer Semantic Text Failures on Pre-8.11 Indices -area: Relevance +area: Mapping type: bug issues: [] From 0d375f692a7f45dbadf0f74bf1055c8c4fa3824a Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Thu, 2 Oct 2025 12:40:48 -0400 Subject: [PATCH 7/9] Fail to parse broken semantic text fields on first document ingest --- .../mapper/SemanticTextFieldMapper.java | 45 ++++++++++++++----- 1 file changed, 33 insertions(+), 12 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java index f3453700b6254..1a6c455258f94 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java @@ -100,6 +100,7 @@ import java.util.function.Function; import java.util.function.Supplier; +import static org.elasticsearch.index.IndexVersions.NEW_SPARSE_VECTOR; import static org.elasticsearch.index.IndexVersions.SEMANTIC_TEXT_DEFAULTS_TO_BBQ; import static org.elasticsearch.index.IndexVersions.SEMANTIC_TEXT_DEFAULTS_TO_BBQ_BACKPORT_8_X; import static org.elasticsearch.inference.TaskType.SPARSE_EMBEDDING; @@ -590,16 +591,33 @@ SemanticTextField parseSemanticTextField(DocumentParserContext context) throws I if (parser.currentToken() == XContentParser.Token.VALUE_NULL) { return null; } + + SemanticTextField semanticTextField; boolean isWithinLeaf = context.path().isWithinLeafObject(); try { context.path().setWithinLeafObject(true); - return SemanticTextField.parse( + semanticTextField = SemanticTextField.parse( context.parser(), new SemanticTextField.ParserContext(fieldType().useLegacyFormat, fullPath(), context.parser().contentType()) ); } finally { context.path().setWithinLeafObject(isWithinLeaf); } + + IndexVersion indexCreatedVersion = context.indexSettings().getIndexVersionCreated(); + if (semanticTextField != null + && semanticTextField.inference().modelSettings() != null + && indexCreatedVersion.before(NEW_SPARSE_VECTOR)) { + // Explicitly fail to parse semantic text fields that meet the following criteria: + // - Are in pre 8.11 indices + // - Have model settings, indicating that they have embeddings to be indexed + // + // We can't fail earlier than this because it causes pre 8.11 indices with semantic text fields to either be in red state or + // cause Elasticsearch to not launch. + throw new UnsupportedOperationException(UNSUPPORTED_INDEX_MESSAGE); + } + + return semanticTextField; } void parseCreateFieldFromContext(DocumentParserContext context, SemanticTextField field, XContentLocation xContentLocation) @@ -1250,10 +1268,6 @@ private static Mapper.Builder createEmbeddingsField( SemanticTextIndexOptions indexOptions, boolean useLegacyFormat ) { - if (indexVersionCreated.before(IndexVersions.NEW_SPARSE_VECTOR)) { - throw new UnsupportedOperationException(UNSUPPORTED_INDEX_MESSAGE); - } - return switch (modelSettings.taskType()) { case SPARSE_EMBEDDING -> { SparseVectorFieldMapper.Builder sparseVectorMapperBuilder = new SparseVectorFieldMapper.Builder( @@ -1307,13 +1321,20 @@ private static void configureDenseVectorMapperBuilder( MinimalServiceSettings modelSettings, SemanticTextIndexOptions indexOptions ) { - SimilarityMeasure similarity = modelSettings.similarity(); - if (similarity != null) { - switch (similarity) { - case COSINE -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.COSINE); - case DOT_PRODUCT -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.DOT_PRODUCT); - case L2_NORM -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.L2_NORM); - default -> throw new IllegalArgumentException("Unknown similarity measure in model_settings [" + similarity.name() + "]"); + // Skip setting similarity on pre 8.11 indices. It causes dense vector field creation to fail because similarity can only be set + // on indexed fields, which is not done by default prior to 8.11. The fact that the dense vector field is partially configured is + // moot because we will explicitly fail to index docs into this semantic text field anyways. + if (indexVersionCreated.onOrAfter(NEW_SPARSE_VECTOR)) { + SimilarityMeasure similarity = modelSettings.similarity(); + if (similarity != null) { + switch (similarity) { + case COSINE -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.COSINE); + case DOT_PRODUCT -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.DOT_PRODUCT); + case L2_NORM -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.L2_NORM); + default -> throw new IllegalArgumentException( + "Unknown similarity measure in model_settings [" + similarity.name() + "]" + ); + } } } From 1da8e64fe49cd54de9a6e0eff5df26a1f399bfd5 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Thu, 2 Oct 2025 13:14:59 -0400 Subject: [PATCH 8/9] Updated tests --- .../mapper/SemanticTextFieldMapperTests.java | 96 +++++++++++++------ 1 file changed, 65 insertions(+), 31 deletions(-) diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java index 0a9530be52962..98a5d93b1c85f 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java @@ -1215,62 +1215,96 @@ public void testModelSettingsRequiredWithChunks() throws IOException { } public void testPre811IndexSemanticTextDenseVectorRaisesError() throws IOException { + Model model = TestModel.createRandomInstance(TaskType.TEXT_EMBEDDING); String fieldName = randomAlphaOfLength(8); + MapperService mapperService = createMapperService( - mapping(b -> b.startObject(fieldName).field("type", "semantic_text").field("inference_id", "test_model").endObject()), + mapping( + b -> b.startObject(fieldName).field("type", "semantic_text").field("inference_id", model.getInferenceEntityId()).endObject() + ), true, IndexVersions.V_8_0_0, IndexVersionUtils.getPreviousVersion(IndexVersions.NEW_SPARSE_VECTOR) ); assertSemanticTextField(mapperService, fieldName, false, null, null); - MapperParsingException e = expectThrows( - MapperParsingException.class, - () -> merge( - mapperService, - mapping( - b -> b.startObject(fieldName) - .field("type", "semantic_text") - .field("inference_id", "test_model") - .startObject("model_settings") - .field("task_type", TaskType.TEXT_EMBEDDING.toString()) - .field("dimensions", 256) - .field("similarity", SimilarityMeasure.COSINE.toString()) - .field("element_type", DenseVectorFieldMapper.ElementType.FLOAT) - .endObject() - .endObject() + merge( + mapperService, + mapping( + b -> b.startObject(fieldName) + .field("type", "semantic_text") + .field("inference_id", model.getInferenceEntityId()) + .startObject("model_settings") + .field("task_type", TaskType.TEXT_EMBEDDING.toString()) + .field("dimensions", model.getServiceSettings().dimensions()) + .field("similarity", model.getServiceSettings().similarity()) + .field("element_type", model.getServiceSettings().elementType()) + .endObject() + .endObject() + ) + ); + assertSemanticTextField(mapperService, fieldName, true, null, null); + + DocumentMapper documentMapper = mapperService.documentMapper(); + DocumentParsingException e = assertThrows( + DocumentParsingException.class, + () -> documentMapper.parse( + source( + b -> addSemanticTextInferenceResults( + true, + b, + List.of(randomSemanticText(true, fieldName, model, null, List.of("foo", "bar"), XContentType.JSON)) + ) ) ) ); - assertThat(e.getMessage(), equalTo("Failed to parse mapping: " + UNSUPPORTED_INDEX_MESSAGE)); + assertThat(e.getCause(), instanceOf(UnsupportedOperationException.class)); + assertThat(e.getCause().getMessage(), equalTo(UNSUPPORTED_INDEX_MESSAGE)); } public void testPre811IndexSemanticTextSparseVectorRaisesError() throws IOException { + Model model = TestModel.createRandomInstance(TaskType.SPARSE_EMBEDDING); String fieldName = randomAlphaOfLength(8); + MapperService mapperService = createMapperService( - mapping(b -> b.startObject(fieldName).field("type", "semantic_text").field("inference_id", "test_model").endObject()), + mapping( + b -> b.startObject(fieldName).field("type", "semantic_text").field("inference_id", model.getInferenceEntityId()).endObject() + ), true, IndexVersions.V_8_0_0, IndexVersionUtils.getPreviousVersion(IndexVersions.NEW_SPARSE_VECTOR) ); assertSemanticTextField(mapperService, fieldName, false, null, null); - MapperParsingException e = expectThrows( - MapperParsingException.class, - () -> merge( - mapperService, - mapping( - b -> b.startObject(fieldName) - .field("type", "semantic_text") - .field("inference_id", "test_model") - .startObject("model_settings") - .field("task_type", TaskType.SPARSE_EMBEDDING.toString()) - .endObject() - .endObject() + merge( + mapperService, + mapping( + b -> b.startObject(fieldName) + .field("type", "semantic_text") + .field("inference_id", model.getInferenceEntityId()) + .startObject("model_settings") + .field("task_type", TaskType.SPARSE_EMBEDDING.toString()) + .endObject() + .endObject() + ) + ); + assertSemanticTextField(mapperService, fieldName, true, null, null); + + DocumentMapper documentMapper = mapperService.documentMapper(); + DocumentParsingException e = assertThrows( + DocumentParsingException.class, + () -> documentMapper.parse( + source( + b -> addSemanticTextInferenceResults( + true, + b, + List.of(randomSemanticText(true, fieldName, model, null, List.of("foo", "bar"), XContentType.JSON)) + ) ) ) ); - assertThat(e.getMessage(), equalTo("Failed to parse mapping: " + UNSUPPORTED_INDEX_MESSAGE)); + assertThat(e.getCause(), instanceOf(UnsupportedOperationException.class)); + assertThat(e.getCause().getMessage(), equalTo(UNSUPPORTED_INDEX_MESSAGE)); } private MapperService mapperServiceForFieldWithModelSettings(String fieldName, String inferenceId, MinimalServiceSettings modelSettings) From 0fc7daa970b4c8ed54a428b4ff0899519ca71d08 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Thu, 2 Oct 2025 13:29:09 -0400 Subject: [PATCH 9/9] Updated changelog --- docs/changelog/135845.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/changelog/135845.yaml b/docs/changelog/135845.yaml index 25d8c41ca6bcb..032a01a5b3542 100644 --- a/docs/changelog/135845.yaml +++ b/docs/changelog/135845.yaml @@ -1,5 +1,5 @@ pr: 135845 -summary: Defer Semantic Text Failures on Pre-8.11 Indices +summary: Fix for creating semantic_text fields on pre-8.11 indices crashing Elasticsearch area: Mapping type: bug issues: []