Skip to content

Commit e28a43a

Browse files
mromaioselasticsearchmachinekderusso
authored
refactor(semantic_text): fail early in pre-8.11 indices (elastic#133080) (elastic#133577)
* fix(semantic_text): index underlying dense_vector field in older indices * Update docs/changelog/133080.yaml * [CI] Auto commit changes from spotless * update msg, change versions, add ut * indent * remove todo * [CI] Auto commit changes from spotless * update msg * [CI] Auto commit changes from spotless * add densevectormapper indexed ut * [CI] Auto commit changes from spotless * fix ut * undo accidental ut removal * [CI] Auto commit changes from spotless * Update docs/changelog/133080.yaml * remove sparse_vector exception change * [CI] Auto commit changes from spotless * reverting most of the stuff * removing unused import, rename test * [CI] Auto commit changes from spotless * syntax * revisit changelog * revisit changelog * Update docs/changelog/133080.yaml * Update x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java * changelog format * fix UT --------- Co-authored-by: elasticsearchmachine <[email protected]> Co-authored-by: Kathleen DeRusso <[email protected]>
1 parent 6b0c351 commit e28a43a

File tree

4 files changed

+64
-1
lines changed

4 files changed

+64
-1
lines changed

docs/changelog/133080.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 133080
2+
summary: "Disallow creating `semantic_text` fields in indices created prior to 8.11.0"
3+
area: Relevance
4+
type: bug
5+
issues: []

server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@
6868
import static org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase.randomNormalizedVector;
6969
import static org.elasticsearch.index.codec.vectors.IVFVectorsFormat.DYNAMIC_NPROBE;
7070
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.DEFAULT_OVERSAMPLE;
71+
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.INDEXED_BY_DEFAULT_INDEX_VERSION;
7172
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.IVF_FORMAT;
7273
import static org.hamcrest.Matchers.containsString;
7374
import static org.hamcrest.Matchers.equalTo;
@@ -107,7 +108,7 @@ private void indexMapping(XContentBuilder b, IndexVersion indexVersion) throws I
107108
if (elementType != ElementType.FLOAT) {
108109
b.field("element_type", elementType.toString());
109110
}
110-
if (indexVersion.onOrAfter(DenseVectorFieldMapper.INDEXED_BY_DEFAULT_INDEX_VERSION) || indexed) {
111+
if (indexVersion.onOrAfter(INDEXED_BY_DEFAULT_INDEX_VERSION) || indexed) {
111112
// Serialize if it's new index version, or it was not the default for previous indices
112113
b.field("index", indexed);
113114
}

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@
9999
import java.util.function.Function;
100100
import java.util.function.Supplier;
101101

102+
import static org.elasticsearch.index.IndexVersions.NEW_SPARSE_VECTOR;
102103
import static org.elasticsearch.index.IndexVersions.SEMANTIC_TEXT_DEFAULTS_TO_BBQ;
103104
import static org.elasticsearch.index.IndexVersions.SEMANTIC_TEXT_DEFAULTS_TO_BBQ_BACKPORT_8_X;
104105
import static org.elasticsearch.inference.TaskType.SPARSE_EMBEDDING;
@@ -124,6 +125,7 @@
124125
*/
125126
public class SemanticTextFieldMapper extends FieldMapper implements InferenceFieldMapper {
126127
private static final Logger logger = LogManager.getLogger(SemanticTextFieldMapper.class);
128+
public static final String UNSUPPORTED_INDEX_MESSAGE = "[semantic_text] is available on indices created with 8.11 or higher.";
127129
public static final NodeFeature SEMANTIC_TEXT_IN_OBJECT_FIELD_FIX = new NodeFeature("semantic_text.in_object_field_fix");
128130
public static final NodeFeature SEMANTIC_TEXT_SINGLE_FIELD_UPDATE_FIX = new NodeFeature("semantic_text.single_field_update_fix");
129131
public static final NodeFeature SEMANTIC_TEXT_DELETE_FIX = new NodeFeature("semantic_text.delete_fix");
@@ -156,6 +158,9 @@ public static final TypeParser parser(Supplier<ModelRegistry> modelRegistry) {
156158

157159
public static BiConsumer<String, MappingParserContext> validateParserContext(String type) {
158160
return (n, c) -> {
161+
if (c.getIndexSettings().getIndexVersionCreated().before(NEW_SPARSE_VECTOR)) {
162+
throw new UnsupportedOperationException(UNSUPPORTED_INDEX_MESSAGE);
163+
}
159164
if (InferenceMetadataFieldsMapper.isEnabled(c.getIndexSettings().getSettings()) == false) {
160165
notInMultiFields(type).accept(n, c);
161166
}

x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@
107107
import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper.DEFAULT_ELSER_2_INFERENCE_ID;
108108
import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper.DEFAULT_RESCORE_OVERSAMPLE;
109109
import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper.INDEX_OPTIONS_FIELD;
110+
import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper.UNSUPPORTED_INDEX_MESSAGE;
110111
import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldTests.generateRandomChunkingSettings;
111112
import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldTests.generateRandomChunkingSettingsOtherThan;
112113
import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldTests.randomSemanticText;
@@ -402,6 +403,57 @@ public void testInvalidTaskTypes() {
402403
}
403404
}
404405

406+
@Override
407+
protected IndexVersion boostNotAllowedIndexVersion() {
408+
return IndexVersions.NEW_SPARSE_VECTOR;
409+
}
410+
411+
public void testOldIndexSemanticTextDenseVectorRaisesError() throws IOException {
412+
final String fieldName = "field";
413+
final XContentBuilder fieldMapping = fieldMapping(b -> {
414+
b.field("type", "semantic_text");
415+
b.field(INFERENCE_ID_FIELD, "test_inference_id");
416+
b.startObject("model_settings");
417+
b.field("task_type", "text_embedding");
418+
b.field("dimensions", 384);
419+
b.field("similarity", "cosine");
420+
b.field("element_type", "float");
421+
b.endObject();
422+
});
423+
assertOldIndexUnsupported(fieldMapping);
424+
}
425+
426+
public void testOldIndexSemanticTextMinimalMappingRaisesError() throws IOException {
427+
final XContentBuilder fieldMapping = fieldMapping(this::minimalMapping);
428+
assertOldIndexUnsupported(fieldMapping);
429+
}
430+
431+
public void testOldIndexSemanticTextSparseVersionRaisesError() throws IOException {
432+
final XContentBuilder fieldMapping = fieldMapping(b -> {
433+
b.field("type", "semantic_text");
434+
b.field("inference_id", "another_inference_id");
435+
b.startObject("model_settings");
436+
b.field("task_type", "sparse_embedding");
437+
b.endObject();
438+
});
439+
assertOldIndexUnsupported(fieldMapping);
440+
}
441+
442+
private void assertOldIndexUnsupported(XContentBuilder fieldMapping) {
443+
444+
MapperParsingException exception = assertThrows(
445+
MapperParsingException.class,
446+
() -> createMapperService(
447+
fieldMapping,
448+
true,
449+
IndexVersions.V_8_0_0,
450+
IndexVersionUtils.getPreviousVersion(IndexVersions.NEW_SPARSE_VECTOR)
451+
)
452+
);
453+
assertTrue(exception.getMessage().contains(UNSUPPORTED_INDEX_MESSAGE));
454+
assertTrue(exception.getRootCause() instanceof UnsupportedOperationException);
455+
}
456+
405457
public void testMultiFieldsSupport() throws IOException {
406458
if (useLegacyFormat) {
407459
Exception e = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> {

0 commit comments

Comments
 (0)