Skip to content

Commit 41ffda5

Browse files
committed
Defer Semantic Text Failures on Pre-8.11 Indices (elastic#135845)
(cherry picked from commit 66d9241) # Conflicts: # server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java # x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java
1 parent e683fb0 commit 41ffda5

File tree

4 files changed

+138
-67
lines changed

4 files changed

+138
-67
lines changed

docs/changelog/135845.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 135845
2+
summary: Fix for creating semantic_text fields on pre-8.11 indices crashing Elasticsearch
3+
area: Mapping
4+
type: bug
5+
issues: []

server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,6 @@
6363

6464
import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.DEFAULT_BEAM_WIDTH;
6565
import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN;
66-
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.INDEXED_BY_DEFAULT_INDEX_VERSION;
6766
import static org.hamcrest.Matchers.containsString;
6867
import static org.hamcrest.Matchers.equalTo;
6968
import static org.hamcrest.Matchers.instanceOf;
@@ -100,7 +99,7 @@ private void indexMapping(XContentBuilder b, IndexVersion indexVersion) throws I
10099
if (elementType != ElementType.FLOAT) {
101100
b.field("element_type", elementType.toString());
102101
}
103-
if (indexVersion.onOrAfter(INDEXED_BY_DEFAULT_INDEX_VERSION) || indexed) {
102+
if (indexVersion.onOrAfter(DenseVectorFieldMapper.INDEXED_BY_DEFAULT_INDEX_VERSION) || indexed) {
104103
// Serialize if it's new index version, or it was not the default for previous indices
105104
b.field("index", indexed);
106105
}

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java

Lines changed: 39 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,6 @@
113113
public class SemanticTextFieldMapper extends FieldMapper implements InferenceFieldMapper {
114114
public static final NodeFeature SEMANTIC_TEXT_SEARCH_INFERENCE_ID = new NodeFeature("semantic_text.search_inference_id", true);
115115
public static final NodeFeature SEMANTIC_TEXT_DEFAULT_ELSER_2 = new NodeFeature("semantic_text.default_elser_2", true);
116-
public static final String UNSUPPORTED_INDEX_MESSAGE = "[semantic_text] is available on indices created with 8.11 or higher.";
117116
public static final NodeFeature SEMANTIC_TEXT_IN_OBJECT_FIELD_FIX = new NodeFeature("semantic_text.in_object_field_fix");
118117
public static final NodeFeature SEMANTIC_TEXT_SINGLE_FIELD_UPDATE_FIX = new NodeFeature("semantic_text.single_field_update_fix");
119118
public static final NodeFeature SEMANTIC_TEXT_DELETE_FIX = new NodeFeature("semantic_text.delete_fix");
@@ -126,16 +125,19 @@ public class SemanticTextFieldMapper extends FieldMapper implements InferenceFie
126125
public static final String CONTENT_TYPE = "semantic_text";
127126
public static final String DEFAULT_ELSER_2_INFERENCE_ID = DEFAULT_ELSER_ID;
128127

128+
public static final String UNSUPPORTED_INDEX_MESSAGE = "["
129+
+ CONTENT_TYPE
130+
+ "] is available on indices created with 8.11 or higher. Please create a new index to use ["
131+
+ CONTENT_TYPE
132+
+ "]";
133+
129134
public static final TypeParser PARSER = new TypeParser(
130135
(n, c) -> new Builder(n, c::bitSetProducer, c.getIndexSettings()),
131136
List.of(validateParserContext(CONTENT_TYPE))
132137
);
133138

134139
public static BiConsumer<String, MappingParserContext> validateParserContext(String type) {
135140
return (n, c) -> {
136-
if (c.getIndexSettings().getIndexVersionCreated().before(NEW_SPARSE_VECTOR)) {
137-
throw new UnsupportedOperationException(UNSUPPORTED_INDEX_MESSAGE);
138-
}
139141
if (InferenceMetadataFieldsMapper.isEnabled(c.getIndexSettings().getSettings()) == false) {
140142
notInMultiFields(type).accept(n, c);
141143
}
@@ -379,16 +381,33 @@ SemanticTextField parseSemanticTextField(DocumentParserContext context) throws I
379381
if (parser.currentToken() == XContentParser.Token.VALUE_NULL) {
380382
return null;
381383
}
384+
385+
SemanticTextField semanticTextField;
382386
boolean isWithinLeaf = context.path().isWithinLeafObject();
383387
try {
384388
context.path().setWithinLeafObject(true);
385-
return SemanticTextField.parse(
389+
semanticTextField = SemanticTextField.parse(
386390
context.parser(),
387391
new SemanticTextField.ParserContext(fieldType().useLegacyFormat, fullPath(), context.parser().contentType())
388392
);
389393
} finally {
390394
context.path().setWithinLeafObject(isWithinLeaf);
391395
}
396+
397+
IndexVersion indexCreatedVersion = context.indexSettings().getIndexVersionCreated();
398+
if (semanticTextField != null
399+
&& semanticTextField.inference().modelSettings() != null
400+
&& indexCreatedVersion.before(NEW_SPARSE_VECTOR)) {
401+
// Explicitly fail to parse semantic text fields that meet the following criteria:
402+
// - Are in pre 8.11 indices
403+
// - Have model settings, indicating that they have embeddings to be indexed
404+
//
405+
// We can't fail earlier than this because it causes pre 8.11 indices with semantic text fields to either be in red state or
406+
// cause Elasticsearch to not launch.
407+
throw new UnsupportedOperationException(UNSUPPORTED_INDEX_MESSAGE);
408+
}
409+
410+
return semanticTextField;
392411
}
393412

394413
void parseCreateFieldFromContext(DocumentParserContext context, SemanticTextField field, XContentLocation xContentLocation)
@@ -964,17 +983,23 @@ private static Mapper.Builder createEmbeddingsField(
964983
indexVersionCreated
965984
);
966985

967-
SimilarityMeasure similarity = modelSettings.similarity();
968-
if (similarity != null) {
969-
switch (similarity) {
970-
case COSINE -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.COSINE);
971-
case DOT_PRODUCT -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.DOT_PRODUCT);
972-
case L2_NORM -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.L2_NORM);
973-
default -> throw new IllegalArgumentException(
974-
"Unknown similarity measure in model_settings [" + similarity.name() + "]"
975-
);
986+
// Skip setting similarity on pre 8.11 indices. It causes dense vector field creation to fail because similarity can only be set
987+
// on indexed fields, which is not done by default prior to 8.11. The fact that the dense vector field is partially configured is
988+
// moot because we will explicitly fail to index docs into this semantic text field anyways.
989+
if (indexVersionCreated.onOrAfter(NEW_SPARSE_VECTOR)) {
990+
SimilarityMeasure similarity = modelSettings.similarity();
991+
if (similarity != null) {
992+
switch (similarity) {
993+
case COSINE -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.COSINE);
994+
case DOT_PRODUCT -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.DOT_PRODUCT);
995+
case L2_NORM -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.L2_NORM);
996+
default -> throw new IllegalArgumentException(
997+
"Unknown similarity measure in model_settings [" + similarity.name() + "]"
998+
);
999+
}
9761000
}
9771001
}
1002+
9781003
denseVectorMapperBuilder.dimensions(modelSettings.dimensions());
9791004
denseVectorMapperBuilder.elementType(modelSettings.elementType());
9801005

x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java

Lines changed: 93 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -339,57 +339,6 @@ public void testInvalidTaskTypes() {
339339
}
340340
}
341341

342-
@Override
343-
protected IndexVersion boostNotAllowedIndexVersion() {
344-
return IndexVersions.NEW_SPARSE_VECTOR;
345-
}
346-
347-
public void testOldIndexSemanticTextDenseVectorRaisesError() throws IOException {
348-
final String fieldName = "field";
349-
final XContentBuilder fieldMapping = fieldMapping(b -> {
350-
b.field("type", "semantic_text");
351-
b.field(INFERENCE_ID_FIELD, "test_inference_id");
352-
b.startObject("model_settings");
353-
b.field("task_type", "text_embedding");
354-
b.field("dimensions", 384);
355-
b.field("similarity", "cosine");
356-
b.field("element_type", "float");
357-
b.endObject();
358-
});
359-
assertOldIndexUnsupported(fieldMapping);
360-
}
361-
362-
public void testOldIndexSemanticTextMinimalMappingRaisesError() throws IOException {
363-
final XContentBuilder fieldMapping = fieldMapping(this::minimalMapping);
364-
assertOldIndexUnsupported(fieldMapping);
365-
}
366-
367-
public void testOldIndexSemanticTextSparseVersionRaisesError() throws IOException {
368-
final XContentBuilder fieldMapping = fieldMapping(b -> {
369-
b.field("type", "semantic_text");
370-
b.field("inference_id", "another_inference_id");
371-
b.startObject("model_settings");
372-
b.field("task_type", "sparse_embedding");
373-
b.endObject();
374-
});
375-
assertOldIndexUnsupported(fieldMapping);
376-
}
377-
378-
private void assertOldIndexUnsupported(XContentBuilder fieldMapping) {
379-
380-
MapperParsingException exception = assertThrows(
381-
MapperParsingException.class,
382-
() -> createMapperService(
383-
fieldMapping,
384-
true,
385-
IndexVersions.V_8_0_0,
386-
IndexVersionUtils.getPreviousVersion(IndexVersions.NEW_SPARSE_VECTOR)
387-
)
388-
);
389-
assertTrue(exception.getMessage().contains(UNSUPPORTED_INDEX_MESSAGE));
390-
assertTrue(exception.getRootCause() instanceof UnsupportedOperationException);
391-
}
392-
393342
public void testMultiFieldsSupport() throws IOException {
394343
if (useLegacyFormat) {
395344
Exception e = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> {
@@ -943,6 +892,99 @@ public void testModelSettingsRequiredWithChunks() throws IOException {
943892
assertThat(ex.getMessage(), containsString("[model_settings] must be set for field [field] when chunks are provided"));
944893
}
945894

895+
public void testPre811IndexSemanticTextDenseVectorRaisesError() throws IOException {
896+
Model model = TestModel.createRandomInstance(TaskType.TEXT_EMBEDDING);
897+
String fieldName = randomAlphaOfLength(8);
898+
899+
MapperService mapperService = createMapperService(
900+
mapping(
901+
b -> b.startObject(fieldName).field("type", "semantic_text").field("inference_id", model.getInferenceEntityId()).endObject()
902+
),
903+
true,
904+
IndexVersions.V_8_0_0,
905+
IndexVersionUtils.getPreviousVersion(IndexVersions.NEW_SPARSE_VECTOR)
906+
);
907+
assertSemanticTextField(mapperService, fieldName, false, null, null);
908+
909+
merge(
910+
mapperService,
911+
mapping(
912+
b -> b.startObject(fieldName)
913+
.field("type", "semantic_text")
914+
.field("inference_id", model.getInferenceEntityId())
915+
.startObject("model_settings")
916+
.field("task_type", TaskType.TEXT_EMBEDDING.toString())
917+
.field("dimensions", model.getServiceSettings().dimensions())
918+
.field("similarity", model.getServiceSettings().similarity())
919+
.field("element_type", model.getServiceSettings().elementType())
920+
.endObject()
921+
.endObject()
922+
)
923+
);
924+
assertSemanticTextField(mapperService, fieldName, true, null, null);
925+
926+
DocumentMapper documentMapper = mapperService.documentMapper();
927+
DocumentParsingException e = assertThrows(
928+
DocumentParsingException.class,
929+
() -> documentMapper.parse(
930+
source(
931+
b -> addSemanticTextInferenceResults(
932+
true,
933+
b,
934+
List.of(randomSemanticText(true, fieldName, model, null, List.of("foo", "bar"), XContentType.JSON))
935+
)
936+
)
937+
)
938+
);
939+
assertThat(e.getCause(), instanceOf(UnsupportedOperationException.class));
940+
assertThat(e.getCause().getMessage(), equalTo(UNSUPPORTED_INDEX_MESSAGE));
941+
}
942+
943+
public void testPre811IndexSemanticTextSparseVectorRaisesError() throws IOException {
944+
Model model = TestModel.createRandomInstance(TaskType.SPARSE_EMBEDDING);
945+
String fieldName = randomAlphaOfLength(8);
946+
947+
MapperService mapperService = createMapperService(
948+
mapping(
949+
b -> b.startObject(fieldName).field("type", "semantic_text").field("inference_id", model.getInferenceEntityId()).endObject()
950+
),
951+
true,
952+
IndexVersions.V_8_0_0,
953+
IndexVersionUtils.getPreviousVersion(IndexVersions.NEW_SPARSE_VECTOR)
954+
);
955+
assertSemanticTextField(mapperService, fieldName, false, null, null);
956+
957+
merge(
958+
mapperService,
959+
mapping(
960+
b -> b.startObject(fieldName)
961+
.field("type", "semantic_text")
962+
.field("inference_id", model.getInferenceEntityId())
963+
.startObject("model_settings")
964+
.field("task_type", TaskType.SPARSE_EMBEDDING.toString())
965+
.endObject()
966+
.endObject()
967+
)
968+
);
969+
assertSemanticTextField(mapperService, fieldName, true, null, null);
970+
971+
DocumentMapper documentMapper = mapperService.documentMapper();
972+
DocumentParsingException e = assertThrows(
973+
DocumentParsingException.class,
974+
() -> documentMapper.parse(
975+
source(
976+
b -> addSemanticTextInferenceResults(
977+
true,
978+
b,
979+
List.of(randomSemanticText(true, fieldName, model, null, List.of("foo", "bar"), XContentType.JSON))
980+
)
981+
)
982+
)
983+
);
984+
assertThat(e.getCause(), instanceOf(UnsupportedOperationException.class));
985+
assertThat(e.getCause().getMessage(), equalTo(UNSUPPORTED_INDEX_MESSAGE));
986+
}
987+
946988
private MapperService mapperServiceForFieldWithModelSettings(String fieldName, String inferenceId, MinimalServiceSettings modelSettings)
947989
throws IOException {
948990
return mapperServiceForFieldWithModelSettings(fieldName, inferenceId, null, modelSettings);

0 commit comments

Comments
 (0)