Skip to content

Commit 90ee222

Browse files
Mikep86elasticsearchmachine
andauthored
[9.1] Defer Semantic Text Failures on Pre-8.11 Indices (#135845) (#135869)
* Defer Semantic Text Failures on Pre-8.11 Indices (#135845) (cherry picked from commit 66d9241) # Conflicts: # server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java # x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java * [CI] Auto commit changes from spotless * Update comment --------- Co-authored-by: elasticsearchmachine <[email protected]>
1 parent 57155e1 commit 90ee222

File tree

4 files changed

+139
-67
lines changed

4 files changed

+139
-67
lines changed

docs/changelog/135845.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 135845
2+
summary: Fix for creating semantic_text fields on pre-8.11 indices crashing Elasticsearch
3+
area: Mapping
4+
type: bug
5+
issues: []

server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,6 @@
6868
import static org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase.randomNormalizedVector;
6969
import static org.elasticsearch.index.codec.vectors.IVFVectorsFormat.DYNAMIC_NPROBE;
7070
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.DEFAULT_OVERSAMPLE;
71-
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.INDEXED_BY_DEFAULT_INDEX_VERSION;
7271
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.IVF_FORMAT;
7372
import static org.hamcrest.Matchers.containsString;
7473
import static org.hamcrest.Matchers.equalTo;
@@ -108,7 +107,7 @@ private void indexMapping(XContentBuilder b, IndexVersion indexVersion) throws I
108107
if (elementType != ElementType.FLOAT) {
109108
b.field("element_type", elementType.toString());
110109
}
111-
if (indexVersion.onOrAfter(INDEXED_BY_DEFAULT_INDEX_VERSION) || indexed) {
110+
if (indexVersion.onOrAfter(DenseVectorFieldMapper.INDEXED_BY_DEFAULT_INDEX_VERSION) || indexed) {
112111
// Serialize if it's new index version, or it was not the default for previous indices
113112
b.field("index", indexed);
114113
}

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java

Lines changed: 40 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@
125125
*/
126126
public class SemanticTextFieldMapper extends FieldMapper implements InferenceFieldMapper {
127127
private static final Logger logger = LogManager.getLogger(SemanticTextFieldMapper.class);
128-
public static final String UNSUPPORTED_INDEX_MESSAGE = "[semantic_text] is available on indices created with 8.11 or higher.";
128+
129129
public static final NodeFeature SEMANTIC_TEXT_IN_OBJECT_FIELD_FIX = new NodeFeature("semantic_text.in_object_field_fix");
130130
public static final NodeFeature SEMANTIC_TEXT_SINGLE_FIELD_UPDATE_FIX = new NodeFeature("semantic_text.single_field_update_fix");
131131
public static final NodeFeature SEMANTIC_TEXT_DELETE_FIX = new NodeFeature("semantic_text.delete_fix");
@@ -145,6 +145,12 @@ public class SemanticTextFieldMapper extends FieldMapper implements InferenceFie
145145
public static final String CONTENT_TYPE = "semantic_text";
146146
public static final String DEFAULT_ELSER_2_INFERENCE_ID = DEFAULT_ELSER_ID;
147147

148+
public static final String UNSUPPORTED_INDEX_MESSAGE = "["
149+
+ CONTENT_TYPE
150+
+ "] is available on indices created with 8.11 or higher. Please create a new index to use ["
151+
+ CONTENT_TYPE
152+
+ "]";
153+
148154
public static final float DEFAULT_RESCORE_OVERSAMPLE = 3.0f;
149155

150156
static final String INDEX_OPTIONS_FIELD = "index_options";
@@ -158,9 +164,6 @@ public static final TypeParser parser(Supplier<ModelRegistry> modelRegistry) {
158164

159165
public static BiConsumer<String, MappingParserContext> validateParserContext(String type) {
160166
return (n, c) -> {
161-
if (c.getIndexSettings().getIndexVersionCreated().before(NEW_SPARSE_VECTOR)) {
162-
throw new UnsupportedOperationException(UNSUPPORTED_INDEX_MESSAGE);
163-
}
164167
if (InferenceMetadataFieldsMapper.isEnabled(c.getIndexSettings().getSettings()) == false) {
165168
notInMultiFields(type).accept(n, c);
166169
}
@@ -548,16 +551,33 @@ SemanticTextField parseSemanticTextField(DocumentParserContext context) throws I
548551
if (parser.currentToken() == XContentParser.Token.VALUE_NULL) {
549552
return null;
550553
}
554+
555+
SemanticTextField semanticTextField;
551556
boolean isWithinLeaf = context.path().isWithinLeafObject();
552557
try {
553558
context.path().setWithinLeafObject(true);
554-
return SemanticTextField.parse(
559+
semanticTextField = SemanticTextField.parse(
555560
context.parser(),
556561
new SemanticTextField.ParserContext(fieldType().useLegacyFormat, fullPath(), context.parser().contentType())
557562
);
558563
} finally {
559564
context.path().setWithinLeafObject(isWithinLeaf);
560565
}
566+
567+
IndexVersion indexCreatedVersion = context.indexSettings().getIndexVersionCreated();
568+
if (semanticTextField != null
569+
&& semanticTextField.inference().modelSettings() != null
570+
&& indexCreatedVersion.before(NEW_SPARSE_VECTOR)) {
571+
// Explicitly fail to parse semantic text fields that meet the following criteria:
572+
// - Are in pre 8.11 indices
573+
// - Have model settings, indicating that they have embeddings to be indexed
574+
//
575+
// We can't fail earlier than this because it causes pre 8.11 indices with semantic text fields to either be in red state or
576+
// cause Elasticsearch to not launch.
577+
throw new UnsupportedOperationException(UNSUPPORTED_INDEX_MESSAGE);
578+
}
579+
580+
return semanticTextField;
561581
}
562582

563583
void parseCreateFieldFromContext(DocumentParserContext context, SemanticTextField field, XContentLocation xContentLocation)
@@ -1158,17 +1178,23 @@ private static Mapper.Builder createEmbeddingsField(
11581178
indexVersionCreated
11591179
);
11601180

1161-
SimilarityMeasure similarity = modelSettings.similarity();
1162-
if (similarity != null) {
1163-
switch (similarity) {
1164-
case COSINE -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.COSINE);
1165-
case DOT_PRODUCT -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.DOT_PRODUCT);
1166-
case L2_NORM -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.L2_NORM);
1167-
default -> throw new IllegalArgumentException(
1168-
"Unknown similarity measure in model_settings [" + similarity.name() + "]"
1169-
);
1181+
// Skip setting similarity on pre 8.11 indices. It causes dense vector field creation to fail because similarity can only be
1182+
// set on indexed fields, which is not done by default prior to 8.11. The fact that the dense vector field is partially
1183+
// configured is moot because we will explicitly fail to index docs into this semantic text field anyways.
1184+
if (indexVersionCreated.onOrAfter(NEW_SPARSE_VECTOR)) {
1185+
SimilarityMeasure similarity = modelSettings.similarity();
1186+
if (similarity != null) {
1187+
switch (similarity) {
1188+
case COSINE -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.COSINE);
1189+
case DOT_PRODUCT -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.DOT_PRODUCT);
1190+
case L2_NORM -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.L2_NORM);
1191+
default -> throw new IllegalArgumentException(
1192+
"Unknown similarity measure in model_settings [" + similarity.name() + "]"
1193+
);
1194+
}
11701195
}
11711196
}
1197+
11721198
denseVectorMapperBuilder.dimensions(modelSettings.dimensions());
11731199
denseVectorMapperBuilder.elementType(modelSettings.elementType());
11741200
if (indexOptions != null) {

x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java

Lines changed: 93 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -403,57 +403,6 @@ public void testInvalidTaskTypes() {
403403
}
404404
}
405405

406-
@Override
407-
protected IndexVersion boostNotAllowedIndexVersion() {
408-
return IndexVersions.NEW_SPARSE_VECTOR;
409-
}
410-
411-
public void testOldIndexSemanticTextDenseVectorRaisesError() throws IOException {
412-
final String fieldName = "field";
413-
final XContentBuilder fieldMapping = fieldMapping(b -> {
414-
b.field("type", "semantic_text");
415-
b.field(INFERENCE_ID_FIELD, "test_inference_id");
416-
b.startObject("model_settings");
417-
b.field("task_type", "text_embedding");
418-
b.field("dimensions", 384);
419-
b.field("similarity", "cosine");
420-
b.field("element_type", "float");
421-
b.endObject();
422-
});
423-
assertOldIndexUnsupported(fieldMapping);
424-
}
425-
426-
public void testOldIndexSemanticTextMinimalMappingRaisesError() throws IOException {
427-
final XContentBuilder fieldMapping = fieldMapping(this::minimalMapping);
428-
assertOldIndexUnsupported(fieldMapping);
429-
}
430-
431-
public void testOldIndexSemanticTextSparseVersionRaisesError() throws IOException {
432-
final XContentBuilder fieldMapping = fieldMapping(b -> {
433-
b.field("type", "semantic_text");
434-
b.field("inference_id", "another_inference_id");
435-
b.startObject("model_settings");
436-
b.field("task_type", "sparse_embedding");
437-
b.endObject();
438-
});
439-
assertOldIndexUnsupported(fieldMapping);
440-
}
441-
442-
private void assertOldIndexUnsupported(XContentBuilder fieldMapping) {
443-
444-
MapperParsingException exception = assertThrows(
445-
MapperParsingException.class,
446-
() -> createMapperService(
447-
fieldMapping,
448-
true,
449-
IndexVersions.V_8_0_0,
450-
IndexVersionUtils.getPreviousVersion(IndexVersions.NEW_SPARSE_VECTOR)
451-
)
452-
);
453-
assertTrue(exception.getMessage().contains(UNSUPPORTED_INDEX_MESSAGE));
454-
assertTrue(exception.getRootCause() instanceof UnsupportedOperationException);
455-
}
456-
457406
public void testMultiFieldsSupport() throws IOException {
458407
if (useLegacyFormat) {
459408
Exception e = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> {
@@ -1134,6 +1083,99 @@ public void testModelSettingsRequiredWithChunks() throws IOException {
11341083
assertThat(ex.getMessage(), containsString("[model_settings] must be set for field [field] when chunks are provided"));
11351084
}
11361085

1086+
public void testPre811IndexSemanticTextDenseVectorRaisesError() throws IOException {
1087+
Model model = TestModel.createRandomInstance(TaskType.TEXT_EMBEDDING);
1088+
String fieldName = randomAlphaOfLength(8);
1089+
1090+
MapperService mapperService = createMapperService(
1091+
mapping(
1092+
b -> b.startObject(fieldName).field("type", "semantic_text").field("inference_id", model.getInferenceEntityId()).endObject()
1093+
),
1094+
true,
1095+
IndexVersions.V_8_0_0,
1096+
IndexVersionUtils.getPreviousVersion(IndexVersions.NEW_SPARSE_VECTOR)
1097+
);
1098+
assertSemanticTextField(mapperService, fieldName, false, null, null);
1099+
1100+
merge(
1101+
mapperService,
1102+
mapping(
1103+
b -> b.startObject(fieldName)
1104+
.field("type", "semantic_text")
1105+
.field("inference_id", model.getInferenceEntityId())
1106+
.startObject("model_settings")
1107+
.field("task_type", TaskType.TEXT_EMBEDDING.toString())
1108+
.field("dimensions", model.getServiceSettings().dimensions())
1109+
.field("similarity", model.getServiceSettings().similarity())
1110+
.field("element_type", model.getServiceSettings().elementType())
1111+
.endObject()
1112+
.endObject()
1113+
)
1114+
);
1115+
assertSemanticTextField(mapperService, fieldName, true, null, null);
1116+
1117+
DocumentMapper documentMapper = mapperService.documentMapper();
1118+
DocumentParsingException e = assertThrows(
1119+
DocumentParsingException.class,
1120+
() -> documentMapper.parse(
1121+
source(
1122+
b -> addSemanticTextInferenceResults(
1123+
true,
1124+
b,
1125+
List.of(randomSemanticText(true, fieldName, model, null, List.of("foo", "bar"), XContentType.JSON))
1126+
)
1127+
)
1128+
)
1129+
);
1130+
assertThat(e.getCause(), instanceOf(UnsupportedOperationException.class));
1131+
assertThat(e.getCause().getMessage(), equalTo(UNSUPPORTED_INDEX_MESSAGE));
1132+
}
1133+
1134+
public void testPre811IndexSemanticTextSparseVectorRaisesError() throws IOException {
1135+
Model model = TestModel.createRandomInstance(TaskType.SPARSE_EMBEDDING);
1136+
String fieldName = randomAlphaOfLength(8);
1137+
1138+
MapperService mapperService = createMapperService(
1139+
mapping(
1140+
b -> b.startObject(fieldName).field("type", "semantic_text").field("inference_id", model.getInferenceEntityId()).endObject()
1141+
),
1142+
true,
1143+
IndexVersions.V_8_0_0,
1144+
IndexVersionUtils.getPreviousVersion(IndexVersions.NEW_SPARSE_VECTOR)
1145+
);
1146+
assertSemanticTextField(mapperService, fieldName, false, null, null);
1147+
1148+
merge(
1149+
mapperService,
1150+
mapping(
1151+
b -> b.startObject(fieldName)
1152+
.field("type", "semantic_text")
1153+
.field("inference_id", model.getInferenceEntityId())
1154+
.startObject("model_settings")
1155+
.field("task_type", TaskType.SPARSE_EMBEDDING.toString())
1156+
.endObject()
1157+
.endObject()
1158+
)
1159+
);
1160+
assertSemanticTextField(mapperService, fieldName, true, null, null);
1161+
1162+
DocumentMapper documentMapper = mapperService.documentMapper();
1163+
DocumentParsingException e = assertThrows(
1164+
DocumentParsingException.class,
1165+
() -> documentMapper.parse(
1166+
source(
1167+
b -> addSemanticTextInferenceResults(
1168+
true,
1169+
b,
1170+
List.of(randomSemanticText(true, fieldName, model, null, List.of("foo", "bar"), XContentType.JSON))
1171+
)
1172+
)
1173+
)
1174+
);
1175+
assertThat(e.getCause(), instanceOf(UnsupportedOperationException.class));
1176+
assertThat(e.getCause().getMessage(), equalTo(UNSUPPORTED_INDEX_MESSAGE));
1177+
}
1178+
11371179
private MapperService mapperServiceForFieldWithModelSettings(String fieldName, String inferenceId, MinimalServiceSettings modelSettings)
11381180
throws IOException {
11391181
return mapperServiceForFieldWithModelSettings(fieldName, inferenceId, null, modelSettings);

0 commit comments

Comments
 (0)