Skip to content

Commit 5df1764

Browse files
committed
Defer Semantic Text Failures on Pre-8.11 Indices (elastic#135845)
1 parent 7f41867 commit 5df1764

File tree

4 files changed

+138
-65
lines changed

4 files changed

+138
-65
lines changed

docs/changelog/135845.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 135845
2+
summary: Fix for creating semantic_text fields on pre-8.11 indices crashing Elasticsearch
3+
area: Mapping
4+
type: bug
5+
issues: []

server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,6 @@
6767
import static org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase.randomNormalizedVector;
6868
import static org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat.DYNAMIC_VISIT_RATIO;
6969
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.DEFAULT_OVERSAMPLE;
70-
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.INDEXED_BY_DEFAULT_INDEX_VERSION;
7170
import static org.hamcrest.Matchers.containsString;
7271
import static org.hamcrest.Matchers.equalTo;
7372
import static org.hamcrest.Matchers.instanceOf;
@@ -106,7 +105,7 @@ private void indexMapping(XContentBuilder b, IndexVersion indexVersion) throws I
106105
if (elementType != ElementType.FLOAT) {
107106
b.field("element_type", elementType.toString());
108107
}
109-
if (indexVersion.onOrAfter(INDEXED_BY_DEFAULT_INDEX_VERSION) || indexed) {
108+
if (indexVersion.onOrAfter(DenseVectorFieldMapper.INDEXED_BY_DEFAULT_INDEX_VERSION) || indexed) {
110109
// Serialize if it's new index version, or it was not the default for previous indices
111110
b.field("index", indexed);
112111
}

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java

Lines changed: 39 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@
127127
*/
128128
public class SemanticTextFieldMapper extends FieldMapper implements InferenceFieldMapper {
129129
private static final Logger logger = LogManager.getLogger(SemanticTextFieldMapper.class);
130-
public static final String UNSUPPORTED_INDEX_MESSAGE = "[semantic_text] is available on indices created with 8.11 or higher.";
130+
131131
public static final NodeFeature SEMANTIC_TEXT_IN_OBJECT_FIELD_FIX = new NodeFeature("semantic_text.in_object_field_fix");
132132
public static final NodeFeature SEMANTIC_TEXT_SINGLE_FIELD_UPDATE_FIX = new NodeFeature("semantic_text.single_field_update_fix");
133133
public static final NodeFeature SEMANTIC_TEXT_DELETE_FIX = new NodeFeature("semantic_text.delete_fix");
@@ -153,6 +153,12 @@ public class SemanticTextFieldMapper extends FieldMapper implements InferenceFie
153153
public static final String CONTENT_TYPE = "semantic_text";
154154
public static final String DEFAULT_ELSER_2_INFERENCE_ID = DEFAULT_ELSER_ID;
155155

156+
public static final String UNSUPPORTED_INDEX_MESSAGE = "["
157+
+ CONTENT_TYPE
158+
+ "] is available on indices created with 8.11 or higher. Please create a new index to use ["
159+
+ CONTENT_TYPE
160+
+ "]";
161+
156162
public static final float DEFAULT_RESCORE_OVERSAMPLE = 3.0f;
157163

158164
static final String INDEX_OPTIONS_FIELD = "index_options";
@@ -166,9 +172,6 @@ public static final TypeParser parser(Supplier<ModelRegistry> modelRegistry) {
166172

167173
public static BiConsumer<String, MappingParserContext> validateParserContext(String type) {
168174
return (n, c) -> {
169-
if (c.getIndexSettings().getIndexVersionCreated().before(NEW_SPARSE_VECTOR)) {
170-
throw new UnsupportedOperationException(UNSUPPORTED_INDEX_MESSAGE);
171-
}
172175
if (InferenceMetadataFieldsMapper.isEnabled(c.getIndexSettings().getSettings()) == false) {
173176
notInMultiFields(type).accept(n, c);
174177
}
@@ -588,16 +591,33 @@ SemanticTextField parseSemanticTextField(DocumentParserContext context) throws I
588591
if (parser.currentToken() == XContentParser.Token.VALUE_NULL) {
589592
return null;
590593
}
594+
595+
SemanticTextField semanticTextField;
591596
boolean isWithinLeaf = context.path().isWithinLeafObject();
592597
try {
593598
context.path().setWithinLeafObject(true);
594-
return SemanticTextField.parse(
599+
semanticTextField = SemanticTextField.parse(
595600
context.parser(),
596601
new SemanticTextField.ParserContext(fieldType().useLegacyFormat, fullPath(), context.parser().contentType())
597602
);
598603
} finally {
599604
context.path().setWithinLeafObject(isWithinLeaf);
600605
}
606+
607+
IndexVersion indexCreatedVersion = context.indexSettings().getIndexVersionCreated();
608+
if (semanticTextField != null
609+
&& semanticTextField.inference().modelSettings() != null
610+
&& indexCreatedVersion.before(NEW_SPARSE_VECTOR)) {
611+
// Explicitly fail to parse semantic text fields that meet the following criteria:
612+
// - Are in pre 8.11 indices
613+
// - Have model settings, indicating that they have embeddings to be indexed
614+
//
615+
// We can't fail earlier than this because it causes pre 8.11 indices with semantic text fields to either be in red state or
616+
// cause Elasticsearch to not launch.
617+
throw new UnsupportedOperationException(UNSUPPORTED_INDEX_MESSAGE);
618+
}
619+
620+
return semanticTextField;
601621
}
602622

603623
void parseCreateFieldFromContext(DocumentParserContext context, SemanticTextField field, XContentLocation xContentLocation)
@@ -1301,13 +1321,20 @@ private static void configureDenseVectorMapperBuilder(
13011321
MinimalServiceSettings modelSettings,
13021322
SemanticTextIndexOptions indexOptions
13031323
) {
1304-
SimilarityMeasure similarity = modelSettings.similarity();
1305-
if (similarity != null) {
1306-
switch (similarity) {
1307-
case COSINE -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.COSINE);
1308-
case DOT_PRODUCT -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.DOT_PRODUCT);
1309-
case L2_NORM -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.L2_NORM);
1310-
default -> throw new IllegalArgumentException("Unknown similarity measure in model_settings [" + similarity.name() + "]");
1324+
// Skip setting similarity on pre 8.11 indices. It causes dense vector field creation to fail because similarity can only be set
1325+
// on indexed fields, which is not done by default prior to 8.11. The fact that the dense vector field is partially configured is
1326+
// moot because we will explicitly fail to index docs into this semantic text field anyways.
1327+
if (indexVersionCreated.onOrAfter(NEW_SPARSE_VECTOR)) {
1328+
SimilarityMeasure similarity = modelSettings.similarity();
1329+
if (similarity != null) {
1330+
switch (similarity) {
1331+
case COSINE -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.COSINE);
1332+
case DOT_PRODUCT -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.DOT_PRODUCT);
1333+
case L2_NORM -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.L2_NORM);
1334+
default -> throw new IllegalArgumentException(
1335+
"Unknown similarity measure in model_settings [" + similarity.name() + "]"
1336+
);
1337+
}
13111338
}
13121339
}
13131340

x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java

Lines changed: 93 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -415,57 +415,6 @@ public void testInvalidTaskTypes() {
415415
}
416416
}
417417

418-
@Override
419-
protected IndexVersion boostNotAllowedIndexVersion() {
420-
return IndexVersions.NEW_SPARSE_VECTOR;
421-
}
422-
423-
public void testOldIndexSemanticTextDenseVectorRaisesError() throws IOException {
424-
final String fieldName = "field";
425-
final XContentBuilder fieldMapping = fieldMapping(b -> {
426-
b.field("type", "semantic_text");
427-
b.field(INFERENCE_ID_FIELD, "test_inference_id");
428-
b.startObject("model_settings");
429-
b.field("task_type", "text_embedding");
430-
b.field("dimensions", 384);
431-
b.field("similarity", "cosine");
432-
b.field("element_type", "float");
433-
b.endObject();
434-
});
435-
assertOldIndexUnsupported(fieldMapping);
436-
}
437-
438-
public void testOldIndexSemanticTextMinimalMappingRaisesError() throws IOException {
439-
final XContentBuilder fieldMapping = fieldMapping(this::minimalMapping);
440-
assertOldIndexUnsupported(fieldMapping);
441-
}
442-
443-
public void testOldIndexSemanticTextSparseVersionRaisesError() throws IOException {
444-
final XContentBuilder fieldMapping = fieldMapping(b -> {
445-
b.field("type", "semantic_text");
446-
b.field("inference_id", "another_inference_id");
447-
b.startObject("model_settings");
448-
b.field("task_type", "sparse_embedding");
449-
b.endObject();
450-
});
451-
assertOldIndexUnsupported(fieldMapping);
452-
}
453-
454-
private void assertOldIndexUnsupported(XContentBuilder fieldMapping) {
455-
456-
MapperParsingException exception = assertThrows(
457-
MapperParsingException.class,
458-
() -> createMapperService(
459-
fieldMapping,
460-
true,
461-
IndexVersions.V_8_0_0,
462-
IndexVersionUtils.getPreviousVersion(IndexVersions.NEW_SPARSE_VECTOR)
463-
)
464-
);
465-
assertTrue(exception.getMessage().contains(UNSUPPORTED_INDEX_MESSAGE));
466-
assertTrue(exception.getRootCause() instanceof UnsupportedOperationException);
467-
}
468-
469418
public void testMultiFieldsSupport() throws IOException {
470419
if (useLegacyFormat) {
471420
Exception e = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> {
@@ -1265,6 +1214,99 @@ public void testModelSettingsRequiredWithChunks() throws IOException {
12651214
assertThat(ex.getMessage(), containsString("[model_settings] must be set for field [field] when chunks are provided"));
12661215
}
12671216

1217+
public void testPre811IndexSemanticTextDenseVectorRaisesError() throws IOException {
1218+
Model model = TestModel.createRandomInstance(TaskType.TEXT_EMBEDDING);
1219+
String fieldName = randomAlphaOfLength(8);
1220+
1221+
MapperService mapperService = createMapperService(
1222+
mapping(
1223+
b -> b.startObject(fieldName).field("type", "semantic_text").field("inference_id", model.getInferenceEntityId()).endObject()
1224+
),
1225+
true,
1226+
IndexVersions.V_8_0_0,
1227+
IndexVersionUtils.getPreviousVersion(IndexVersions.NEW_SPARSE_VECTOR)
1228+
);
1229+
assertSemanticTextField(mapperService, fieldName, false, null, null);
1230+
1231+
merge(
1232+
mapperService,
1233+
mapping(
1234+
b -> b.startObject(fieldName)
1235+
.field("type", "semantic_text")
1236+
.field("inference_id", model.getInferenceEntityId())
1237+
.startObject("model_settings")
1238+
.field("task_type", TaskType.TEXT_EMBEDDING.toString())
1239+
.field("dimensions", model.getServiceSettings().dimensions())
1240+
.field("similarity", model.getServiceSettings().similarity())
1241+
.field("element_type", model.getServiceSettings().elementType())
1242+
.endObject()
1243+
.endObject()
1244+
)
1245+
);
1246+
assertSemanticTextField(mapperService, fieldName, true, null, null);
1247+
1248+
DocumentMapper documentMapper = mapperService.documentMapper();
1249+
DocumentParsingException e = assertThrows(
1250+
DocumentParsingException.class,
1251+
() -> documentMapper.parse(
1252+
source(
1253+
b -> addSemanticTextInferenceResults(
1254+
true,
1255+
b,
1256+
List.of(randomSemanticText(true, fieldName, model, null, List.of("foo", "bar"), XContentType.JSON))
1257+
)
1258+
)
1259+
)
1260+
);
1261+
assertThat(e.getCause(), instanceOf(UnsupportedOperationException.class));
1262+
assertThat(e.getCause().getMessage(), equalTo(UNSUPPORTED_INDEX_MESSAGE));
1263+
}
1264+
1265+
public void testPre811IndexSemanticTextSparseVectorRaisesError() throws IOException {
1266+
Model model = TestModel.createRandomInstance(TaskType.SPARSE_EMBEDDING);
1267+
String fieldName = randomAlphaOfLength(8);
1268+
1269+
MapperService mapperService = createMapperService(
1270+
mapping(
1271+
b -> b.startObject(fieldName).field("type", "semantic_text").field("inference_id", model.getInferenceEntityId()).endObject()
1272+
),
1273+
true,
1274+
IndexVersions.V_8_0_0,
1275+
IndexVersionUtils.getPreviousVersion(IndexVersions.NEW_SPARSE_VECTOR)
1276+
);
1277+
assertSemanticTextField(mapperService, fieldName, false, null, null);
1278+
1279+
merge(
1280+
mapperService,
1281+
mapping(
1282+
b -> b.startObject(fieldName)
1283+
.field("type", "semantic_text")
1284+
.field("inference_id", model.getInferenceEntityId())
1285+
.startObject("model_settings")
1286+
.field("task_type", TaskType.SPARSE_EMBEDDING.toString())
1287+
.endObject()
1288+
.endObject()
1289+
)
1290+
);
1291+
assertSemanticTextField(mapperService, fieldName, true, null, null);
1292+
1293+
DocumentMapper documentMapper = mapperService.documentMapper();
1294+
DocumentParsingException e = assertThrows(
1295+
DocumentParsingException.class,
1296+
() -> documentMapper.parse(
1297+
source(
1298+
b -> addSemanticTextInferenceResults(
1299+
true,
1300+
b,
1301+
List.of(randomSemanticText(true, fieldName, model, null, List.of("foo", "bar"), XContentType.JSON))
1302+
)
1303+
)
1304+
)
1305+
);
1306+
assertThat(e.getCause(), instanceOf(UnsupportedOperationException.class));
1307+
assertThat(e.getCause().getMessage(), equalTo(UNSUPPORTED_INDEX_MESSAGE));
1308+
}
1309+
12681310
private MapperService mapperServiceForFieldWithModelSettings(String fieldName, String inferenceId, MinimalServiceSettings modelSettings)
12691311
throws IOException {
12701312
return mapperServiceForFieldWithModelSettings(fieldName, inferenceId, null, modelSettings);

0 commit comments

Comments
 (0)