Skip to content

Commit 936f7cf

Browse files
Mikep86elasticsearchmachine
andauthored
[9.0] Defer Semantic Text Failures on Pre-8.11 Indices (#135845) (#135872)
* Defer Semantic Text Failures on Pre-8.11 Indices (#135845) (cherry picked from commit 66d9241) # Conflicts: # server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java # x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java * Update comment * [CI] Auto commit changes from spotless * Fix compile errors --------- Co-authored-by: elasticsearchmachine <[email protected]>
1 parent b316a76 commit 936f7cf

File tree

3 files changed

+138
-65
lines changed

3 files changed

+138
-65
lines changed

docs/changelog/135845.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 135845
2+
summary: Fix for creating semantic_text fields on pre-8.11 indices crashing Elasticsearch
3+
area: Mapping
4+
type: bug
5+
issues: []

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java

Lines changed: 40 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@
114114
*/
115115
public class SemanticTextFieldMapper extends FieldMapper implements InferenceFieldMapper {
116116
private static final Logger logger = LogManager.getLogger(SemanticTextFieldMapper.class);
117-
public static final String UNSUPPORTED_INDEX_MESSAGE = "[semantic_text] is available on indices created with 8.11 or higher.";
117+
118118
public static final NodeFeature SEMANTIC_TEXT_IN_OBJECT_FIELD_FIX = new NodeFeature("semantic_text.in_object_field_fix");
119119
public static final NodeFeature SEMANTIC_TEXT_SINGLE_FIELD_UPDATE_FIX = new NodeFeature("semantic_text.single_field_update_fix");
120120
public static final NodeFeature SEMANTIC_TEXT_DELETE_FIX = new NodeFeature("semantic_text.delete_fix");
@@ -127,16 +127,19 @@ public class SemanticTextFieldMapper extends FieldMapper implements InferenceFie
127127
public static final String CONTENT_TYPE = "semantic_text";
128128
public static final String DEFAULT_ELSER_2_INFERENCE_ID = DEFAULT_ELSER_ID;
129129

130+
public static final String UNSUPPORTED_INDEX_MESSAGE = "["
131+
+ CONTENT_TYPE
132+
+ "] is available on indices created with 8.11 or higher. Please create a new index to use ["
133+
+ CONTENT_TYPE
134+
+ "]";
135+
130136
public static final TypeParser PARSER = new TypeParser(
131137
(n, c) -> new Builder(n, c::bitSetProducer, c.getIndexSettings()),
132138
List.of(validateParserContext(CONTENT_TYPE))
133139
);
134140

135141
public static BiConsumer<String, MappingParserContext> validateParserContext(String type) {
136142
return (n, c) -> {
137-
if (c.getIndexSettings().getIndexVersionCreated().before(NEW_SPARSE_VECTOR)) {
138-
throw new UnsupportedOperationException(UNSUPPORTED_INDEX_MESSAGE);
139-
}
140143
if (InferenceMetadataFieldsMapper.isEnabled(c.getIndexSettings().getSettings()) == false) {
141144
notInMultiFields(type).accept(n, c);
142145
}
@@ -380,16 +383,33 @@ SemanticTextField parseSemanticTextField(DocumentParserContext context) throws I
380383
if (parser.currentToken() == XContentParser.Token.VALUE_NULL) {
381384
return null;
382385
}
386+
387+
SemanticTextField semanticTextField;
383388
boolean isWithinLeaf = context.path().isWithinLeafObject();
384389
try {
385390
context.path().setWithinLeafObject(true);
386-
return SemanticTextField.parse(
391+
semanticTextField = SemanticTextField.parse(
387392
context.parser(),
388393
new SemanticTextField.ParserContext(fieldType().useLegacyFormat, fullPath(), context.parser().contentType())
389394
);
390395
} finally {
391396
context.path().setWithinLeafObject(isWithinLeaf);
392397
}
398+
399+
IndexVersion indexCreatedVersion = context.indexSettings().getIndexVersionCreated();
400+
if (semanticTextField != null
401+
&& semanticTextField.inference().modelSettings() != null
402+
&& indexCreatedVersion.before(NEW_SPARSE_VECTOR)) {
403+
// Explicitly fail to parse semantic text fields that meet the following criteria:
404+
// - Are in pre 8.11 indices
405+
// - Have model settings, indicating that they have embeddings to be indexed
406+
//
407+
// We can't fail earlier than this because it causes pre 8.11 indices with semantic text fields to either be in red state or
408+
// cause Elasticsearch to not launch.
409+
throw new UnsupportedOperationException(UNSUPPORTED_INDEX_MESSAGE);
410+
}
411+
412+
return semanticTextField;
393413
}
394414

395415
void parseCreateFieldFromContext(DocumentParserContext context, SemanticTextField field, XContentLocation xContentLocation)
@@ -965,17 +985,23 @@ private static Mapper.Builder createEmbeddingsField(
965985
indexVersionCreated
966986
);
967987

968-
SimilarityMeasure similarity = modelSettings.similarity();
969-
if (similarity != null) {
970-
switch (similarity) {
971-
case COSINE -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.COSINE);
972-
case DOT_PRODUCT -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.DOT_PRODUCT);
973-
case L2_NORM -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.L2_NORM);
974-
default -> throw new IllegalArgumentException(
975-
"Unknown similarity measure in model_settings [" + similarity.name() + "]"
976-
);
988+
// Skip setting similarity on pre 8.11 indices. It causes dense vector field creation to fail because similarity can only be
989+
// set on indexed fields, which is not done by default prior to 8.11. The fact that the dense vector field is partially
990+
// configured is moot because we will explicitly fail to index docs into this semantic text field anyways.
991+
if (indexVersionCreated.onOrAfter(NEW_SPARSE_VECTOR)) {
992+
SimilarityMeasure similarity = modelSettings.similarity();
993+
if (similarity != null) {
994+
switch (similarity) {
995+
case COSINE -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.COSINE);
996+
case DOT_PRODUCT -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.DOT_PRODUCT);
997+
case L2_NORM -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.L2_NORM);
998+
default -> throw new IllegalArgumentException(
999+
"Unknown similarity measure in model_settings [" + similarity.name() + "]"
1000+
);
1001+
}
9771002
}
9781003
}
1004+
9791005
denseVectorMapperBuilder.dimensions(modelSettings.dimensions());
9801006
denseVectorMapperBuilder.elementType(modelSettings.elementType());
9811007

x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java

Lines changed: 93 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -340,57 +340,6 @@ public void testInvalidTaskTypes() {
340340
}
341341
}
342342

343-
@Override
344-
protected IndexVersion boostNotAllowedIndexVersion() {
345-
return IndexVersions.NEW_SPARSE_VECTOR;
346-
}
347-
348-
public void testOldIndexSemanticTextDenseVectorRaisesError() throws IOException {
349-
final String fieldName = "field";
350-
final XContentBuilder fieldMapping = fieldMapping(b -> {
351-
b.field("type", "semantic_text");
352-
b.field(INFERENCE_ID_FIELD, "test_inference_id");
353-
b.startObject("model_settings");
354-
b.field("task_type", "text_embedding");
355-
b.field("dimensions", 384);
356-
b.field("similarity", "cosine");
357-
b.field("element_type", "float");
358-
b.endObject();
359-
});
360-
assertOldIndexUnsupported(fieldMapping);
361-
}
362-
363-
public void testOldIndexSemanticTextMinimalMappingRaisesError() throws IOException {
364-
final XContentBuilder fieldMapping = fieldMapping(this::minimalMapping);
365-
assertOldIndexUnsupported(fieldMapping);
366-
}
367-
368-
public void testOldIndexSemanticTextSparseVersionRaisesError() throws IOException {
369-
final XContentBuilder fieldMapping = fieldMapping(b -> {
370-
b.field("type", "semantic_text");
371-
b.field("inference_id", "another_inference_id");
372-
b.startObject("model_settings");
373-
b.field("task_type", "sparse_embedding");
374-
b.endObject();
375-
});
376-
assertOldIndexUnsupported(fieldMapping);
377-
}
378-
379-
private void assertOldIndexUnsupported(XContentBuilder fieldMapping) {
380-
381-
MapperParsingException exception = assertThrows(
382-
MapperParsingException.class,
383-
() -> createMapperService(
384-
fieldMapping,
385-
true,
386-
IndexVersions.V_8_0_0,
387-
IndexVersionUtils.getPreviousVersion(IndexVersions.NEW_SPARSE_VECTOR)
388-
)
389-
);
390-
assertTrue(exception.getMessage().contains(UNSUPPORTED_INDEX_MESSAGE));
391-
assertTrue(exception.getRootCause() instanceof UnsupportedOperationException);
392-
}
393-
394343
public void testMultiFieldsSupport() throws IOException {
395344
if (useLegacyFormat) {
396345
Exception e = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> {
@@ -944,6 +893,99 @@ public void testModelSettingsRequiredWithChunks() throws IOException {
944893
assertThat(ex.getMessage(), containsString("[model_settings] must be set for field [field] when chunks are provided"));
945894
}
946895

896+
public void testPre811IndexSemanticTextDenseVectorRaisesError() throws IOException {
897+
Model model = TestModel.createRandomInstance(TaskType.TEXT_EMBEDDING);
898+
String fieldName = randomAlphaOfLength(8);
899+
900+
MapperService mapperService = createMapperService(
901+
mapping(
902+
b -> b.startObject(fieldName).field("type", "semantic_text").field("inference_id", model.getInferenceEntityId()).endObject()
903+
),
904+
true,
905+
IndexVersions.V_8_0_0,
906+
IndexVersionUtils.getPreviousVersion(IndexVersions.NEW_SPARSE_VECTOR)
907+
);
908+
assertSemanticTextField(mapperService, fieldName, false);
909+
910+
merge(
911+
mapperService,
912+
mapping(
913+
b -> b.startObject(fieldName)
914+
.field("type", "semantic_text")
915+
.field("inference_id", model.getInferenceEntityId())
916+
.startObject("model_settings")
917+
.field("task_type", TaskType.TEXT_EMBEDDING.toString())
918+
.field("dimensions", model.getServiceSettings().dimensions())
919+
.field("similarity", model.getServiceSettings().similarity())
920+
.field("element_type", model.getServiceSettings().elementType())
921+
.endObject()
922+
.endObject()
923+
)
924+
);
925+
assertSemanticTextField(mapperService, fieldName, true);
926+
927+
DocumentMapper documentMapper = mapperService.documentMapper();
928+
DocumentParsingException e = assertThrows(
929+
DocumentParsingException.class,
930+
() -> documentMapper.parse(
931+
source(
932+
b -> addSemanticTextInferenceResults(
933+
true,
934+
b,
935+
List.of(randomSemanticText(true, fieldName, model, List.of("foo", "bar"), XContentType.JSON))
936+
)
937+
)
938+
)
939+
);
940+
assertThat(e.getCause(), instanceOf(UnsupportedOperationException.class));
941+
assertThat(e.getCause().getMessage(), equalTo(UNSUPPORTED_INDEX_MESSAGE));
942+
}
943+
944+
public void testPre811IndexSemanticTextSparseVectorRaisesError() throws IOException {
945+
Model model = TestModel.createRandomInstance(TaskType.SPARSE_EMBEDDING);
946+
String fieldName = randomAlphaOfLength(8);
947+
948+
MapperService mapperService = createMapperService(
949+
mapping(
950+
b -> b.startObject(fieldName).field("type", "semantic_text").field("inference_id", model.getInferenceEntityId()).endObject()
951+
),
952+
true,
953+
IndexVersions.V_8_0_0,
954+
IndexVersionUtils.getPreviousVersion(IndexVersions.NEW_SPARSE_VECTOR)
955+
);
956+
assertSemanticTextField(mapperService, fieldName, false);
957+
958+
merge(
959+
mapperService,
960+
mapping(
961+
b -> b.startObject(fieldName)
962+
.field("type", "semantic_text")
963+
.field("inference_id", model.getInferenceEntityId())
964+
.startObject("model_settings")
965+
.field("task_type", TaskType.SPARSE_EMBEDDING.toString())
966+
.endObject()
967+
.endObject()
968+
)
969+
);
970+
assertSemanticTextField(mapperService, fieldName, true);
971+
972+
DocumentMapper documentMapper = mapperService.documentMapper();
973+
DocumentParsingException e = assertThrows(
974+
DocumentParsingException.class,
975+
() -> documentMapper.parse(
976+
source(
977+
b -> addSemanticTextInferenceResults(
978+
true,
979+
b,
980+
List.of(randomSemanticText(true, fieldName, model, List.of("foo", "bar"), XContentType.JSON))
981+
)
982+
)
983+
)
984+
);
985+
assertThat(e.getCause(), instanceOf(UnsupportedOperationException.class));
986+
assertThat(e.getCause().getMessage(), equalTo(UNSUPPORTED_INDEX_MESSAGE));
987+
}
988+
947989
private MapperService mapperServiceForFieldWithModelSettings(String fieldName, String inferenceId, MinimalServiceSettings modelSettings)
948990
throws IOException {
949991
return mapperServiceForFieldWithModelSettings(fieldName, inferenceId, null, modelSettings);

0 commit comments

Comments
 (0)