Skip to content

Commit 027a43c

Browse files
committed
Defer Semantic Text Failures on Pre-8.11 Indices (elastic#135845)
(cherry picked from commit 66d9241) # Conflicts: # server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java # x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java
1 parent f2ca372 commit 027a43c

File tree

4 files changed

+139
-67
lines changed

4 files changed

+139
-67
lines changed

docs/changelog/135845.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 135845
2+
summary: Fix for creating semantic_text fields on pre-8.11 indices crashing Elasticsearch
3+
area: Mapping
4+
type: bug
5+
issues: []

server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,6 @@
6464

6565
import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.DEFAULT_BEAM_WIDTH;
6666
import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN;
67-
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.INDEXED_BY_DEFAULT_INDEX_VERSION;
6867
import static org.hamcrest.Matchers.containsString;
6968
import static org.hamcrest.Matchers.equalTo;
7069
import static org.hamcrest.Matchers.instanceOf;
@@ -101,7 +100,7 @@ private void indexMapping(XContentBuilder b, IndexVersion indexVersion) throws I
101100
if (elementType != ElementType.FLOAT) {
102101
b.field("element_type", elementType.toString());
103102
}
104-
if (indexVersion.onOrAfter(INDEXED_BY_DEFAULT_INDEX_VERSION) || indexed) {
103+
if (indexVersion.onOrAfter(DenseVectorFieldMapper.INDEXED_BY_DEFAULT_INDEX_VERSION) || indexed) {
105104
// Serialize if it's new index version, or it was not the default for previous indices
106105
b.field("index", indexed);
107106
}

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java

Lines changed: 40 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@
123123
*/
124124
public class SemanticTextFieldMapper extends FieldMapper implements InferenceFieldMapper {
125125
private static final Logger logger = LogManager.getLogger(SemanticTextFieldMapper.class);
126-
public static final String UNSUPPORTED_INDEX_MESSAGE = "[semantic_text] is available on indices created with 8.11 or higher.";
126+
127127
public static final NodeFeature SEMANTIC_TEXT_SEARCH_INFERENCE_ID = new NodeFeature("semantic_text.search_inference_id", true);
128128
public static final NodeFeature SEMANTIC_TEXT_DEFAULT_ELSER_2 = new NodeFeature("semantic_text.default_elser_2", true);
129129
public static final NodeFeature SEMANTIC_TEXT_IN_OBJECT_FIELD_FIX = new NodeFeature("semantic_text.in_object_field_fix");
@@ -145,6 +145,12 @@ public class SemanticTextFieldMapper extends FieldMapper implements InferenceFie
145145
public static final String CONTENT_TYPE = "semantic_text";
146146
public static final String DEFAULT_ELSER_2_INFERENCE_ID = DEFAULT_ELSER_ID;
147147

148+
public static final String UNSUPPORTED_INDEX_MESSAGE = "["
149+
+ CONTENT_TYPE
150+
+ "] is available on indices created with 8.11 or higher. Please create a new index to use ["
151+
+ CONTENT_TYPE
152+
+ "]";
153+
148154
public static final float DEFAULT_RESCORE_OVERSAMPLE = 3.0f;
149155

150156
static final String INDEX_OPTIONS_FIELD = "index_options";
@@ -158,9 +164,6 @@ public static final TypeParser parser(Supplier<ModelRegistry> modelRegistry) {
158164

159165
public static BiConsumer<String, MappingParserContext> validateParserContext(String type) {
160166
return (n, c) -> {
161-
if (c.getIndexSettings().getIndexVersionCreated().before(NEW_SPARSE_VECTOR)) {
162-
throw new UnsupportedOperationException(UNSUPPORTED_INDEX_MESSAGE);
163-
}
164167
if (InferenceMetadataFieldsMapper.isEnabled(c.getIndexSettings().getSettings()) == false) {
165168
notInMultiFields(type).accept(n, c);
166169
}
@@ -548,16 +551,33 @@ SemanticTextField parseSemanticTextField(DocumentParserContext context) throws I
548551
if (parser.currentToken() == XContentParser.Token.VALUE_NULL) {
549552
return null;
550553
}
554+
555+
SemanticTextField semanticTextField;
551556
boolean isWithinLeaf = context.path().isWithinLeafObject();
552557
try {
553558
context.path().setWithinLeafObject(true);
554-
return SemanticTextField.parse(
559+
semanticTextField = SemanticTextField.parse(
555560
context.parser(),
556561
new SemanticTextField.ParserContext(fieldType().useLegacyFormat, fullPath(), context.parser().contentType())
557562
);
558563
} finally {
559564
context.path().setWithinLeafObject(isWithinLeaf);
560565
}
566+
567+
IndexVersion indexCreatedVersion = context.indexSettings().getIndexVersionCreated();
568+
if (semanticTextField != null
569+
&& semanticTextField.inference().modelSettings() != null
570+
&& indexCreatedVersion.before(NEW_SPARSE_VECTOR)) {
571+
// Explicitly fail to parse semantic text fields that meet the following criteria:
572+
// - Are in pre 8.11 indices
573+
// - Have model settings, indicating that they have embeddings to be indexed
574+
//
575+
// We can't fail earlier than this because it causes pre 8.11 indices with semantic text fields to either be in red state or
576+
// cause Elasticsearch to not launch.
577+
throw new UnsupportedOperationException(UNSUPPORTED_INDEX_MESSAGE);
578+
}
579+
580+
return semanticTextField;
561581
}
562582

563583
void parseCreateFieldFromContext(DocumentParserContext context, SemanticTextField field, XContentLocation xContentLocation)
@@ -1158,17 +1178,23 @@ private static Mapper.Builder createEmbeddingsField(
11581178
indexVersionCreated
11591179
);
11601180

1161-
SimilarityMeasure similarity = modelSettings.similarity();
1162-
if (similarity != null) {
1163-
switch (similarity) {
1164-
case COSINE -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.COSINE);
1165-
case DOT_PRODUCT -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.DOT_PRODUCT);
1166-
case L2_NORM -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.L2_NORM);
1167-
default -> throw new IllegalArgumentException(
1168-
"Unknown similarity measure in model_settings [" + similarity.name() + "]"
1169-
);
1181+
// Skip setting similarity on pre 8.11 indices. It causes dense vector field creation to fail because similarity can only be set
1182+
// on indexed fields, which is not done by default prior to 8.11. The fact that the dense vector field is partially configured is
1183+
// moot because we will explicitly fail to index docs into this semantic text field anyways.
1184+
if (indexVersionCreated.onOrAfter(NEW_SPARSE_VECTOR)) {
1185+
SimilarityMeasure similarity = modelSettings.similarity();
1186+
if (similarity != null) {
1187+
switch (similarity) {
1188+
case COSINE -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.COSINE);
1189+
case DOT_PRODUCT -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.DOT_PRODUCT);
1190+
case L2_NORM -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.L2_NORM);
1191+
default -> throw new IllegalArgumentException(
1192+
"Unknown similarity measure in model_settings [" + similarity.name() + "]"
1193+
);
1194+
}
11701195
}
11711196
}
1197+
11721198
denseVectorMapperBuilder.dimensions(modelSettings.dimensions());
11731199
denseVectorMapperBuilder.elementType(modelSettings.elementType());
11741200
if (indexOptions != null) {

x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java

Lines changed: 93 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -402,57 +402,6 @@ public void testInvalidTaskTypes() {
402402
}
403403
}
404404

405-
@Override
406-
protected IndexVersion boostNotAllowedIndexVersion() {
407-
return IndexVersions.NEW_SPARSE_VECTOR;
408-
}
409-
410-
public void testOldIndexSemanticTextDenseVectorRaisesError() throws IOException {
411-
final String fieldName = "field";
412-
final XContentBuilder fieldMapping = fieldMapping(b -> {
413-
b.field("type", "semantic_text");
414-
b.field(INFERENCE_ID_FIELD, "test_inference_id");
415-
b.startObject("model_settings");
416-
b.field("task_type", "text_embedding");
417-
b.field("dimensions", 384);
418-
b.field("similarity", "cosine");
419-
b.field("element_type", "float");
420-
b.endObject();
421-
});
422-
assertOldIndexUnsupported(fieldMapping);
423-
}
424-
425-
public void testOldIndexSemanticTextMinimalMappingRaisesError() throws IOException {
426-
final XContentBuilder fieldMapping = fieldMapping(this::minimalMapping);
427-
assertOldIndexUnsupported(fieldMapping);
428-
}
429-
430-
public void testOldIndexSemanticTextSparseVersionRaisesError() throws IOException {
431-
final XContentBuilder fieldMapping = fieldMapping(b -> {
432-
b.field("type", "semantic_text");
433-
b.field("inference_id", "another_inference_id");
434-
b.startObject("model_settings");
435-
b.field("task_type", "sparse_embedding");
436-
b.endObject();
437-
});
438-
assertOldIndexUnsupported(fieldMapping);
439-
}
440-
441-
private void assertOldIndexUnsupported(XContentBuilder fieldMapping) {
442-
443-
MapperParsingException exception = assertThrows(
444-
MapperParsingException.class,
445-
() -> createMapperService(
446-
fieldMapping,
447-
true,
448-
IndexVersions.V_8_0_0,
449-
IndexVersionUtils.getPreviousVersion(IndexVersions.NEW_SPARSE_VECTOR)
450-
)
451-
);
452-
assertTrue(exception.getMessage().contains(UNSUPPORTED_INDEX_MESSAGE));
453-
assertTrue(exception.getRootCause() instanceof UnsupportedOperationException);
454-
}
455-
456405
public void testMultiFieldsSupport() throws IOException {
457406
if (useLegacyFormat) {
458407
Exception e = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> {
@@ -1133,6 +1082,99 @@ public void testModelSettingsRequiredWithChunks() throws IOException {
11331082
assertThat(ex.getMessage(), containsString("[model_settings] must be set for field [field] when chunks are provided"));
11341083
}
11351084

1085+
public void testPre811IndexSemanticTextDenseVectorRaisesError() throws IOException {
1086+
Model model = TestModel.createRandomInstance(TaskType.TEXT_EMBEDDING);
1087+
String fieldName = randomAlphaOfLength(8);
1088+
1089+
MapperService mapperService = createMapperService(
1090+
mapping(
1091+
b -> b.startObject(fieldName).field("type", "semantic_text").field("inference_id", model.getInferenceEntityId()).endObject()
1092+
),
1093+
true,
1094+
IndexVersions.V_8_0_0,
1095+
IndexVersionUtils.getPreviousVersion(IndexVersions.NEW_SPARSE_VECTOR)
1096+
);
1097+
assertSemanticTextField(mapperService, fieldName, false, null, null);
1098+
1099+
merge(
1100+
mapperService,
1101+
mapping(
1102+
b -> b.startObject(fieldName)
1103+
.field("type", "semantic_text")
1104+
.field("inference_id", model.getInferenceEntityId())
1105+
.startObject("model_settings")
1106+
.field("task_type", TaskType.TEXT_EMBEDDING.toString())
1107+
.field("dimensions", model.getServiceSettings().dimensions())
1108+
.field("similarity", model.getServiceSettings().similarity())
1109+
.field("element_type", model.getServiceSettings().elementType())
1110+
.endObject()
1111+
.endObject()
1112+
)
1113+
);
1114+
assertSemanticTextField(mapperService, fieldName, true, null, null);
1115+
1116+
DocumentMapper documentMapper = mapperService.documentMapper();
1117+
DocumentParsingException e = assertThrows(
1118+
DocumentParsingException.class,
1119+
() -> documentMapper.parse(
1120+
source(
1121+
b -> addSemanticTextInferenceResults(
1122+
true,
1123+
b,
1124+
List.of(randomSemanticText(true, fieldName, model, null, List.of("foo", "bar"), XContentType.JSON))
1125+
)
1126+
)
1127+
)
1128+
);
1129+
assertThat(e.getCause(), instanceOf(UnsupportedOperationException.class));
1130+
assertThat(e.getCause().getMessage(), equalTo(UNSUPPORTED_INDEX_MESSAGE));
1131+
}
1132+
1133+
public void testPre811IndexSemanticTextSparseVectorRaisesError() throws IOException {
1134+
Model model = TestModel.createRandomInstance(TaskType.SPARSE_EMBEDDING);
1135+
String fieldName = randomAlphaOfLength(8);
1136+
1137+
MapperService mapperService = createMapperService(
1138+
mapping(
1139+
b -> b.startObject(fieldName).field("type", "semantic_text").field("inference_id", model.getInferenceEntityId()).endObject()
1140+
),
1141+
true,
1142+
IndexVersions.V_8_0_0,
1143+
IndexVersionUtils.getPreviousVersion(IndexVersions.NEW_SPARSE_VECTOR)
1144+
);
1145+
assertSemanticTextField(mapperService, fieldName, false, null, null);
1146+
1147+
merge(
1148+
mapperService,
1149+
mapping(
1150+
b -> b.startObject(fieldName)
1151+
.field("type", "semantic_text")
1152+
.field("inference_id", model.getInferenceEntityId())
1153+
.startObject("model_settings")
1154+
.field("task_type", TaskType.SPARSE_EMBEDDING.toString())
1155+
.endObject()
1156+
.endObject()
1157+
)
1158+
);
1159+
assertSemanticTextField(mapperService, fieldName, true, null, null);
1160+
1161+
DocumentMapper documentMapper = mapperService.documentMapper();
1162+
DocumentParsingException e = assertThrows(
1163+
DocumentParsingException.class,
1164+
() -> documentMapper.parse(
1165+
source(
1166+
b -> addSemanticTextInferenceResults(
1167+
true,
1168+
b,
1169+
List.of(randomSemanticText(true, fieldName, model, null, List.of("foo", "bar"), XContentType.JSON))
1170+
)
1171+
)
1172+
)
1173+
);
1174+
assertThat(e.getCause(), instanceOf(UnsupportedOperationException.class));
1175+
assertThat(e.getCause().getMessage(), equalTo(UNSUPPORTED_INDEX_MESSAGE));
1176+
}
1177+
11361178
private MapperService mapperServiceForFieldWithModelSettings(String fieldName, String inferenceId, MinimalServiceSettings modelSettings)
11371179
throws IOException {
11381180
return mapperServiceForFieldWithModelSettings(fieldName, inferenceId, null, modelSettings);

0 commit comments

Comments
 (0)