Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/135845.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 135845
summary: Defer Semantic Text Failures on Pre-8.11 Indices
area: Mapping
type: bug
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@
import static org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase.randomNormalizedVector;
import static org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat.DYNAMIC_VISIT_RATIO;
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.DEFAULT_OVERSAMPLE;
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.INDEXED_BY_DEFAULT_INDEX_VERSION;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.instanceOf;
Expand Down Expand Up @@ -106,7 +105,7 @@ private void indexMapping(XContentBuilder b, IndexVersion indexVersion) throws I
if (elementType != ElementType.FLOAT) {
b.field("element_type", elementType.toString());
}
if (indexVersion.onOrAfter(INDEXED_BY_DEFAULT_INDEX_VERSION) || indexed) {
if (indexVersion.onOrAfter(DenseVectorFieldMapper.INDEXED_BY_DEFAULT_INDEX_VERSION) || indexed) {
// Serialize if it's new index version, or it was not the default for previous indices
b.field("index", indexed);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@
*/
public class SemanticTextFieldMapper extends FieldMapper implements InferenceFieldMapper {
private static final Logger logger = LogManager.getLogger(SemanticTextFieldMapper.class);
public static final String UNSUPPORTED_INDEX_MESSAGE = "[semantic_text] is available on indices created with 8.11 or higher.";

public static final NodeFeature SEMANTIC_TEXT_IN_OBJECT_FIELD_FIX = new NodeFeature("semantic_text.in_object_field_fix");
public static final NodeFeature SEMANTIC_TEXT_SINGLE_FIELD_UPDATE_FIX = new NodeFeature("semantic_text.single_field_update_fix");
public static final NodeFeature SEMANTIC_TEXT_DELETE_FIX = new NodeFeature("semantic_text.delete_fix");
Expand All @@ -153,6 +153,12 @@ public class SemanticTextFieldMapper extends FieldMapper implements InferenceFie
public static final String CONTENT_TYPE = "semantic_text";
public static final String DEFAULT_ELSER_2_INFERENCE_ID = DEFAULT_ELSER_ID;

public static final String UNSUPPORTED_INDEX_MESSAGE = "["
+ CONTENT_TYPE
+ "] is available on indices created with 8.11 or higher. Please create a new index to use ["
+ CONTENT_TYPE
+ "]";

public static final float DEFAULT_RESCORE_OVERSAMPLE = 3.0f;

static final String INDEX_OPTIONS_FIELD = "index_options";
Expand All @@ -166,9 +172,6 @@ public static final TypeParser parser(Supplier<ModelRegistry> modelRegistry) {

public static BiConsumer<String, MappingParserContext> validateParserContext(String type) {
return (n, c) -> {
if (c.getIndexSettings().getIndexVersionCreated().before(NEW_SPARSE_VECTOR)) {
throw new UnsupportedOperationException(UNSUPPORTED_INDEX_MESSAGE);
}
if (InferenceMetadataFieldsMapper.isEnabled(c.getIndexSettings().getSettings()) == false) {
notInMultiFields(type).accept(n, c);
}
Expand Down Expand Up @@ -588,16 +591,33 @@ SemanticTextField parseSemanticTextField(DocumentParserContext context) throws I
if (parser.currentToken() == XContentParser.Token.VALUE_NULL) {
return null;
}

SemanticTextField semanticTextField;
boolean isWithinLeaf = context.path().isWithinLeafObject();
try {
context.path().setWithinLeafObject(true);
return SemanticTextField.parse(
semanticTextField = SemanticTextField.parse(
context.parser(),
new SemanticTextField.ParserContext(fieldType().useLegacyFormat, fullPath(), context.parser().contentType())
);
} finally {
context.path().setWithinLeafObject(isWithinLeaf);
}

IndexVersion indexCreatedVersion = context.indexSettings().getIndexVersionCreated();
if (semanticTextField != null
&& semanticTextField.inference().modelSettings() != null
&& indexCreatedVersion.before(NEW_SPARSE_VECTOR)) {
// Explicitly fail to parse semantic text fields that meet the following criteria:
// - Are in pre 8.11 indices
// - Have model settings, indicating that they have embeddings to be indexed
//
// We can't fail earlier than this because it causes pre 8.11 indices with semantic text fields to either be in red state or
// cause Elasticsearch to not launch.
throw new UnsupportedOperationException(UNSUPPORTED_INDEX_MESSAGE);
}

return semanticTextField;
}

void parseCreateFieldFromContext(DocumentParserContext context, SemanticTextField field, XContentLocation xContentLocation)
Expand Down Expand Up @@ -1301,13 +1321,20 @@ private static void configureDenseVectorMapperBuilder(
MinimalServiceSettings modelSettings,
SemanticTextIndexOptions indexOptions
) {
SimilarityMeasure similarity = modelSettings.similarity();
if (similarity != null) {
switch (similarity) {
case COSINE -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.COSINE);
case DOT_PRODUCT -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.DOT_PRODUCT);
case L2_NORM -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.L2_NORM);
default -> throw new IllegalArgumentException("Unknown similarity measure in model_settings [" + similarity.name() + "]");
// Skip setting similarity on pre 8.11 indices. It causes dense vector field creation to fail because similarity can only be set
// on indexed fields, which is not done by default prior to 8.11. The fact that the dense vector field is partially configured is
// moot because we will explicitly fail to index docs into this semantic text field anyways.
if (indexVersionCreated.onOrAfter(NEW_SPARSE_VECTOR)) {
SimilarityMeasure similarity = modelSettings.similarity();
if (similarity != null) {
switch (similarity) {
case COSINE -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.COSINE);
case DOT_PRODUCT -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.DOT_PRODUCT);
case L2_NORM -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.L2_NORM);
default -> throw new IllegalArgumentException(
"Unknown similarity measure in model_settings [" + similarity.name() + "]"
);
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -415,57 +415,6 @@ public void testInvalidTaskTypes() {
}
}

@Override
protected IndexVersion boostNotAllowedIndexVersion() {
return IndexVersions.NEW_SPARSE_VECTOR;
}

public void testOldIndexSemanticTextDenseVectorRaisesError() throws IOException {
final String fieldName = "field";
final XContentBuilder fieldMapping = fieldMapping(b -> {
b.field("type", "semantic_text");
b.field(INFERENCE_ID_FIELD, "test_inference_id");
b.startObject("model_settings");
b.field("task_type", "text_embedding");
b.field("dimensions", 384);
b.field("similarity", "cosine");
b.field("element_type", "float");
b.endObject();
});
assertOldIndexUnsupported(fieldMapping);
}

public void testOldIndexSemanticTextMinimalMappingRaisesError() throws IOException {
final XContentBuilder fieldMapping = fieldMapping(this::minimalMapping);
assertOldIndexUnsupported(fieldMapping);
}

public void testOldIndexSemanticTextSparseVersionRaisesError() throws IOException {
final XContentBuilder fieldMapping = fieldMapping(b -> {
b.field("type", "semantic_text");
b.field("inference_id", "another_inference_id");
b.startObject("model_settings");
b.field("task_type", "sparse_embedding");
b.endObject();
});
assertOldIndexUnsupported(fieldMapping);
}

private void assertOldIndexUnsupported(XContentBuilder fieldMapping) {

MapperParsingException exception = assertThrows(
MapperParsingException.class,
() -> createMapperService(
fieldMapping,
true,
IndexVersions.V_8_0_0,
IndexVersionUtils.getPreviousVersion(IndexVersions.NEW_SPARSE_VECTOR)
)
);
assertTrue(exception.getMessage().contains(UNSUPPORTED_INDEX_MESSAGE));
assertTrue(exception.getRootCause() instanceof UnsupportedOperationException);
}

public void testMultiFieldsSupport() throws IOException {
if (useLegacyFormat) {
Exception e = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> {
Expand Down Expand Up @@ -1265,6 +1214,99 @@ public void testModelSettingsRequiredWithChunks() throws IOException {
assertThat(ex.getMessage(), containsString("[model_settings] must be set for field [field] when chunks are provided"));
}

public void testPre811IndexSemanticTextDenseVectorRaisesError() throws IOException {
Model model = TestModel.createRandomInstance(TaskType.TEXT_EMBEDDING);
String fieldName = randomAlphaOfLength(8);

MapperService mapperService = createMapperService(
mapping(
b -> b.startObject(fieldName).field("type", "semantic_text").field("inference_id", model.getInferenceEntityId()).endObject()
),
true,
IndexVersions.V_8_0_0,
IndexVersionUtils.getPreviousVersion(IndexVersions.NEW_SPARSE_VECTOR)
);
assertSemanticTextField(mapperService, fieldName, false, null, null);

merge(
mapperService,
mapping(
b -> b.startObject(fieldName)
.field("type", "semantic_text")
.field("inference_id", model.getInferenceEntityId())
.startObject("model_settings")
.field("task_type", TaskType.TEXT_EMBEDDING.toString())
.field("dimensions", model.getServiceSettings().dimensions())
.field("similarity", model.getServiceSettings().similarity())
.field("element_type", model.getServiceSettings().elementType())
.endObject()
.endObject()
)
);
assertSemanticTextField(mapperService, fieldName, true, null, null);

DocumentMapper documentMapper = mapperService.documentMapper();
DocumentParsingException e = assertThrows(
DocumentParsingException.class,
() -> documentMapper.parse(
source(
b -> addSemanticTextInferenceResults(
true,
b,
List.of(randomSemanticText(true, fieldName, model, null, List.of("foo", "bar"), XContentType.JSON))
)
)
)
);
assertThat(e.getCause(), instanceOf(UnsupportedOperationException.class));
assertThat(e.getCause().getMessage(), equalTo(UNSUPPORTED_INDEX_MESSAGE));
}

public void testPre811IndexSemanticTextSparseVectorRaisesError() throws IOException {
Model model = TestModel.createRandomInstance(TaskType.SPARSE_EMBEDDING);
String fieldName = randomAlphaOfLength(8);

MapperService mapperService = createMapperService(
mapping(
b -> b.startObject(fieldName).field("type", "semantic_text").field("inference_id", model.getInferenceEntityId()).endObject()
),
true,
IndexVersions.V_8_0_0,
IndexVersionUtils.getPreviousVersion(IndexVersions.NEW_SPARSE_VECTOR)
);
assertSemanticTextField(mapperService, fieldName, false, null, null);

merge(
mapperService,
mapping(
b -> b.startObject(fieldName)
.field("type", "semantic_text")
.field("inference_id", model.getInferenceEntityId())
.startObject("model_settings")
.field("task_type", TaskType.SPARSE_EMBEDDING.toString())
.endObject()
.endObject()
)
);
assertSemanticTextField(mapperService, fieldName, true, null, null);

DocumentMapper documentMapper = mapperService.documentMapper();
DocumentParsingException e = assertThrows(
DocumentParsingException.class,
() -> documentMapper.parse(
source(
b -> addSemanticTextInferenceResults(
true,
b,
List.of(randomSemanticText(true, fieldName, model, null, List.of("foo", "bar"), XContentType.JSON))
)
)
)
);
assertThat(e.getCause(), instanceOf(UnsupportedOperationException.class));
assertThat(e.getCause().getMessage(), equalTo(UNSUPPORTED_INDEX_MESSAGE));
}

private MapperService mapperServiceForFieldWithModelSettings(String fieldName, String inferenceId, MinimalServiceSettings modelSettings)
throws IOException {
return mapperServiceForFieldWithModelSettings(fieldName, inferenceId, null, modelSettings);
Expand Down