Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/135845.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 135845
summary: Fix for creating semantic_text fields on pre-8.11 indices crashing Elasticsearch
area: Mapping
type: bug
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@
*/
public class SemanticTextFieldMapper extends FieldMapper implements InferenceFieldMapper {
private static final Logger logger = LogManager.getLogger(SemanticTextFieldMapper.class);
public static final String UNSUPPORTED_INDEX_MESSAGE = "[semantic_text] is available on indices created with 8.11 or higher.";

public static final NodeFeature SEMANTIC_TEXT_IN_OBJECT_FIELD_FIX = new NodeFeature("semantic_text.in_object_field_fix");
public static final NodeFeature SEMANTIC_TEXT_SINGLE_FIELD_UPDATE_FIX = new NodeFeature("semantic_text.single_field_update_fix");
public static final NodeFeature SEMANTIC_TEXT_DELETE_FIX = new NodeFeature("semantic_text.delete_fix");
Expand All @@ -127,16 +127,19 @@ public class SemanticTextFieldMapper extends FieldMapper implements InferenceFie
public static final String CONTENT_TYPE = "semantic_text";
public static final String DEFAULT_ELSER_2_INFERENCE_ID = DEFAULT_ELSER_ID;

public static final String UNSUPPORTED_INDEX_MESSAGE = "["
+ CONTENT_TYPE
+ "] is available on indices created with 8.11 or higher. Please create a new index to use ["
+ CONTENT_TYPE
+ "]";

public static final TypeParser PARSER = new TypeParser(
(n, c) -> new Builder(n, c::bitSetProducer, c.getIndexSettings()),
List.of(validateParserContext(CONTENT_TYPE))
);

public static BiConsumer<String, MappingParserContext> validateParserContext(String type) {
return (n, c) -> {
if (c.getIndexSettings().getIndexVersionCreated().before(NEW_SPARSE_VECTOR)) {
throw new UnsupportedOperationException(UNSUPPORTED_INDEX_MESSAGE);
}
if (InferenceMetadataFieldsMapper.isEnabled(c.getIndexSettings().getSettings()) == false) {
notInMultiFields(type).accept(n, c);
}
Expand Down Expand Up @@ -380,16 +383,33 @@ SemanticTextField parseSemanticTextField(DocumentParserContext context) throws I
if (parser.currentToken() == XContentParser.Token.VALUE_NULL) {
return null;
}

SemanticTextField semanticTextField;
boolean isWithinLeaf = context.path().isWithinLeafObject();
try {
context.path().setWithinLeafObject(true);
return SemanticTextField.parse(
semanticTextField = SemanticTextField.parse(
context.parser(),
new SemanticTextField.ParserContext(fieldType().useLegacyFormat, fullPath(), context.parser().contentType())
);
} finally {
context.path().setWithinLeafObject(isWithinLeaf);
}

IndexVersion indexCreatedVersion = context.indexSettings().getIndexVersionCreated();
if (semanticTextField != null
&& semanticTextField.inference().modelSettings() != null
&& indexCreatedVersion.before(NEW_SPARSE_VECTOR)) {
// Explicitly fail to parse semantic text fields that meet the following criteria:
// - Are in pre 8.11 indices
// - Have model settings, indicating that they have embeddings to be indexed
//
// We can't fail earlier than this because it causes pre 8.11 indices with semantic text fields to either be in red state or
// cause Elasticsearch to not launch.
throw new UnsupportedOperationException(UNSUPPORTED_INDEX_MESSAGE);
}

return semanticTextField;
}

void parseCreateFieldFromContext(DocumentParserContext context, SemanticTextField field, XContentLocation xContentLocation)
Expand Down Expand Up @@ -965,17 +985,23 @@ private static Mapper.Builder createEmbeddingsField(
indexVersionCreated
);

SimilarityMeasure similarity = modelSettings.similarity();
if (similarity != null) {
switch (similarity) {
case COSINE -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.COSINE);
case DOT_PRODUCT -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.DOT_PRODUCT);
case L2_NORM -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.L2_NORM);
default -> throw new IllegalArgumentException(
"Unknown similarity measure in model_settings [" + similarity.name() + "]"
);
// Skip setting similarity on pre 8.11 indices. It causes dense vector field creation to fail because similarity can only be
// set on indexed fields, which is not done by default prior to 8.11. The fact that the dense vector field is partially
// configured is moot because we will explicitly fail to index docs into this semantic text field anyways.
if (indexVersionCreated.onOrAfter(NEW_SPARSE_VECTOR)) {
SimilarityMeasure similarity = modelSettings.similarity();
if (similarity != null) {
switch (similarity) {
case COSINE -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.COSINE);
case DOT_PRODUCT -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.DOT_PRODUCT);
case L2_NORM -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.L2_NORM);
default -> throw new IllegalArgumentException(
"Unknown similarity measure in model_settings [" + similarity.name() + "]"
);
}
}
}

denseVectorMapperBuilder.dimensions(modelSettings.dimensions());
denseVectorMapperBuilder.elementType(modelSettings.elementType());

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -340,57 +340,6 @@ public void testInvalidTaskTypes() {
}
}

@Override
protected IndexVersion boostNotAllowedIndexVersion() {
return IndexVersions.NEW_SPARSE_VECTOR;
}

public void testOldIndexSemanticTextDenseVectorRaisesError() throws IOException {
final String fieldName = "field";
final XContentBuilder fieldMapping = fieldMapping(b -> {
b.field("type", "semantic_text");
b.field(INFERENCE_ID_FIELD, "test_inference_id");
b.startObject("model_settings");
b.field("task_type", "text_embedding");
b.field("dimensions", 384);
b.field("similarity", "cosine");
b.field("element_type", "float");
b.endObject();
});
assertOldIndexUnsupported(fieldMapping);
}

public void testOldIndexSemanticTextMinimalMappingRaisesError() throws IOException {
final XContentBuilder fieldMapping = fieldMapping(this::minimalMapping);
assertOldIndexUnsupported(fieldMapping);
}

public void testOldIndexSemanticTextSparseVersionRaisesError() throws IOException {
final XContentBuilder fieldMapping = fieldMapping(b -> {
b.field("type", "semantic_text");
b.field("inference_id", "another_inference_id");
b.startObject("model_settings");
b.field("task_type", "sparse_embedding");
b.endObject();
});
assertOldIndexUnsupported(fieldMapping);
}

private void assertOldIndexUnsupported(XContentBuilder fieldMapping) {

MapperParsingException exception = assertThrows(
MapperParsingException.class,
() -> createMapperService(
fieldMapping,
true,
IndexVersions.V_8_0_0,
IndexVersionUtils.getPreviousVersion(IndexVersions.NEW_SPARSE_VECTOR)
)
);
assertTrue(exception.getMessage().contains(UNSUPPORTED_INDEX_MESSAGE));
assertTrue(exception.getRootCause() instanceof UnsupportedOperationException);
}

public void testMultiFieldsSupport() throws IOException {
if (useLegacyFormat) {
Exception e = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> {
Expand Down Expand Up @@ -944,6 +893,99 @@ public void testModelSettingsRequiredWithChunks() throws IOException {
assertThat(ex.getMessage(), containsString("[model_settings] must be set for field [field] when chunks are provided"));
}

public void testPre811IndexSemanticTextDenseVectorRaisesError() throws IOException {
Model model = TestModel.createRandomInstance(TaskType.TEXT_EMBEDDING);
String fieldName = randomAlphaOfLength(8);

MapperService mapperService = createMapperService(
mapping(
b -> b.startObject(fieldName).field("type", "semantic_text").field("inference_id", model.getInferenceEntityId()).endObject()
),
true,
IndexVersions.V_8_0_0,
IndexVersionUtils.getPreviousVersion(IndexVersions.NEW_SPARSE_VECTOR)
);
assertSemanticTextField(mapperService, fieldName, false);

merge(
mapperService,
mapping(
b -> b.startObject(fieldName)
.field("type", "semantic_text")
.field("inference_id", model.getInferenceEntityId())
.startObject("model_settings")
.field("task_type", TaskType.TEXT_EMBEDDING.toString())
.field("dimensions", model.getServiceSettings().dimensions())
.field("similarity", model.getServiceSettings().similarity())
.field("element_type", model.getServiceSettings().elementType())
.endObject()
.endObject()
)
);
assertSemanticTextField(mapperService, fieldName, true);

DocumentMapper documentMapper = mapperService.documentMapper();
DocumentParsingException e = assertThrows(
DocumentParsingException.class,
() -> documentMapper.parse(
source(
b -> addSemanticTextInferenceResults(
true,
b,
List.of(randomSemanticText(true, fieldName, model, List.of("foo", "bar"), XContentType.JSON))
)
)
)
);
assertThat(e.getCause(), instanceOf(UnsupportedOperationException.class));
assertThat(e.getCause().getMessage(), equalTo(UNSUPPORTED_INDEX_MESSAGE));
}

public void testPre811IndexSemanticTextSparseVectorRaisesError() throws IOException {
Model model = TestModel.createRandomInstance(TaskType.SPARSE_EMBEDDING);
String fieldName = randomAlphaOfLength(8);

MapperService mapperService = createMapperService(
mapping(
b -> b.startObject(fieldName).field("type", "semantic_text").field("inference_id", model.getInferenceEntityId()).endObject()
),
true,
IndexVersions.V_8_0_0,
IndexVersionUtils.getPreviousVersion(IndexVersions.NEW_SPARSE_VECTOR)
);
assertSemanticTextField(mapperService, fieldName, false);

merge(
mapperService,
mapping(
b -> b.startObject(fieldName)
.field("type", "semantic_text")
.field("inference_id", model.getInferenceEntityId())
.startObject("model_settings")
.field("task_type", TaskType.SPARSE_EMBEDDING.toString())
.endObject()
.endObject()
)
);
assertSemanticTextField(mapperService, fieldName, true);

DocumentMapper documentMapper = mapperService.documentMapper();
DocumentParsingException e = assertThrows(
DocumentParsingException.class,
() -> documentMapper.parse(
source(
b -> addSemanticTextInferenceResults(
true,
b,
List.of(randomSemanticText(true, fieldName, model, List.of("foo", "bar"), XContentType.JSON))
)
)
)
);
assertThat(e.getCause(), instanceOf(UnsupportedOperationException.class));
assertThat(e.getCause().getMessage(), equalTo(UNSUPPORTED_INDEX_MESSAGE));
}

private MapperService mapperServiceForFieldWithModelSettings(String fieldName, String inferenceId, MinimalServiceSettings modelSettings)
throws IOException {
return mapperServiceForFieldWithModelSettings(fieldName, inferenceId, null, modelSettings);
Expand Down