Skip to content

Commit 1917851

Browse files
kderussomromaioselasticsearchmachine
authored
[8.18] refactor(semantic_text): fail early in pre-8.11 indices (#133080) (#133582)
* refactor(semantic_text): fail early in pre-8.11 indices (#133080) * fix(semantic_text): index underlying dense_vector field in older indices * Update docs/changelog/133080.yaml * [CI] Auto commit changes from spotless * update msg, change versions, add ut * indent * remove todo * [CI] Auto commit changes from spotless * update msg * [CI] Auto commit changes from spotless * add densevectormapper indexed ut * [CI] Auto commit changes from spotless * fix ut * undo accidental ut removal * [CI] Auto commit changes from spotless * Update docs/changelog/133080.yaml Co-authored-by: Kathleen DeRusso <[email protected]> * remove sparse_vector exception change * [CI] Auto commit changes from spotless * reverting most of the stuff * removing unused import, rename test * [CI] Auto commit changes from spotless * syntax * revisit changelog * revisit changelog * Update docs/changelog/133080.yaml Co-authored-by: Kathleen DeRusso <[email protected]> * Update x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java Co-authored-by: Kathleen DeRusso <[email protected]> * changelog format * fix UT --------- Co-authored-by: elasticsearchmachine <[email protected]> Co-authored-by: Kathleen DeRusso <[email protected]> (cherry picked from commit 8f41a4b) # Conflicts: # server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java # x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java # x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java * [CI] Auto commit changes from spotless * Fix test --------- Co-authored-by: Michail Romaios <[email protected]> Co-authored-by: elasticsearchmachine <[email protected]>
1 parent 1461740 commit 1917851

File tree

5 files changed

+94
-2
lines changed

5 files changed

+94
-2
lines changed

docs/changelog/133080.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 133080
2+
summary: "Disallow creating `semantic_text` fields in indices created prior to 8.11.0"
3+
area: Relevance
4+
type: bug
5+
issues: []

server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@
6363

6464
import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.DEFAULT_BEAM_WIDTH;
6565
import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN;
66+
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.INDEXED_BY_DEFAULT_INDEX_VERSION;
6667
import static org.hamcrest.Matchers.containsString;
6768
import static org.hamcrest.Matchers.equalTo;
6869
import static org.hamcrest.Matchers.instanceOf;
@@ -99,7 +100,7 @@ private void indexMapping(XContentBuilder b, IndexVersion indexVersion) throws I
99100
if (elementType != ElementType.FLOAT) {
100101
b.field("element_type", elementType.toString());
101102
}
102-
if (indexVersion.onOrAfter(DenseVectorFieldMapper.INDEXED_BY_DEFAULT_INDEX_VERSION) || indexed) {
103+
if (indexVersion.onOrAfter(INDEXED_BY_DEFAULT_INDEX_VERSION) || indexed) {
103104
// Serialize if it's new index version, or it was not the default for previous indices
104105
b.field("index", indexed);
105106
}

test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -521,7 +521,7 @@ public final void testMeta() throws IOException {
521521
);
522522
}
523523

524-
public final void testDeprecatedBoostWarning() throws IOException {
524+
public void testDeprecatedBoostWarning() throws IOException {
525525
try {
526526
createMapperService(DEPRECATED_BOOST_INDEX_VERSION, fieldMapping(b -> {
527527
minimalMapping(b, DEPRECATED_BOOST_INDEX_VERSION);

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@
8989
import java.util.function.BiConsumer;
9090
import java.util.function.Function;
9191

92+
import static org.elasticsearch.index.IndexVersions.NEW_SPARSE_VECTOR;
9293
import static org.elasticsearch.inference.TaskType.SPARSE_EMBEDDING;
9394
import static org.elasticsearch.inference.TaskType.TEXT_EMBEDDING;
9495
import static org.elasticsearch.search.SearchService.DEFAULT_SIZE;
@@ -112,6 +113,7 @@
112113
public class SemanticTextFieldMapper extends FieldMapper implements InferenceFieldMapper {
113114
public static final NodeFeature SEMANTIC_TEXT_SEARCH_INFERENCE_ID = new NodeFeature("semantic_text.search_inference_id", true);
114115
public static final NodeFeature SEMANTIC_TEXT_DEFAULT_ELSER_2 = new NodeFeature("semantic_text.default_elser_2", true);
116+
public static final String UNSUPPORTED_INDEX_MESSAGE = "[semantic_text] is available on indices created with 8.11 or higher.";
115117
public static final NodeFeature SEMANTIC_TEXT_IN_OBJECT_FIELD_FIX = new NodeFeature("semantic_text.in_object_field_fix");
116118
public static final NodeFeature SEMANTIC_TEXT_SINGLE_FIELD_UPDATE_FIX = new NodeFeature("semantic_text.single_field_update_fix");
117119
public static final NodeFeature SEMANTIC_TEXT_DELETE_FIX = new NodeFeature("semantic_text.delete_fix");
@@ -131,6 +133,9 @@ public class SemanticTextFieldMapper extends FieldMapper implements InferenceFie
131133

132134
public static BiConsumer<String, MappingParserContext> validateParserContext(String type) {
133135
return (n, c) -> {
136+
if (c.getIndexSettings().getIndexVersionCreated().before(NEW_SPARSE_VECTOR)) {
137+
throw new UnsupportedOperationException(UNSUPPORTED_INDEX_MESSAGE);
138+
}
134139
if (InferenceMetadataFieldsMapper.isEnabled(c.getIndexSettings().getSettings()) == false) {
135140
notInMultiFields(type).accept(n, c);
136141
}

x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,9 @@
9292
import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.getChunksFieldName;
9393
import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.getEmbeddingsFieldName;
9494
import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper.DEFAULT_ELSER_2_INFERENCE_ID;
95+
import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper.UNSUPPORTED_INDEX_MESSAGE;
9596
import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldTests.randomSemanticText;
97+
import static org.hamcrest.Matchers.anyOf;
9698
import static org.hamcrest.Matchers.containsString;
9799
import static org.hamcrest.Matchers.equalTo;
98100
import static org.hamcrest.Matchers.instanceOf;
@@ -337,6 +339,57 @@ public void testInvalidTaskTypes() {
337339
}
338340
}
339341

342+
@Override
343+
protected IndexVersion boostNotAllowedIndexVersion() {
344+
return IndexVersions.NEW_SPARSE_VECTOR;
345+
}
346+
347+
public void testOldIndexSemanticTextDenseVectorRaisesError() throws IOException {
348+
final String fieldName = "field";
349+
final XContentBuilder fieldMapping = fieldMapping(b -> {
350+
b.field("type", "semantic_text");
351+
b.field(INFERENCE_ID_FIELD, "test_inference_id");
352+
b.startObject("model_settings");
353+
b.field("task_type", "text_embedding");
354+
b.field("dimensions", 384);
355+
b.field("similarity", "cosine");
356+
b.field("element_type", "float");
357+
b.endObject();
358+
});
359+
assertOldIndexUnsupported(fieldMapping);
360+
}
361+
362+
public void testOldIndexSemanticTextMinimalMappingRaisesError() throws IOException {
363+
final XContentBuilder fieldMapping = fieldMapping(this::minimalMapping);
364+
assertOldIndexUnsupported(fieldMapping);
365+
}
366+
367+
public void testOldIndexSemanticTextSparseVersionRaisesError() throws IOException {
368+
final XContentBuilder fieldMapping = fieldMapping(b -> {
369+
b.field("type", "semantic_text");
370+
b.field("inference_id", "another_inference_id");
371+
b.startObject("model_settings");
372+
b.field("task_type", "sparse_embedding");
373+
b.endObject();
374+
});
375+
assertOldIndexUnsupported(fieldMapping);
376+
}
377+
378+
private void assertOldIndexUnsupported(XContentBuilder fieldMapping) {
379+
380+
MapperParsingException exception = assertThrows(
381+
MapperParsingException.class,
382+
() -> createMapperService(
383+
fieldMapping,
384+
true,
385+
IndexVersions.V_8_0_0,
386+
IndexVersionUtils.getPreviousVersion(IndexVersions.NEW_SPARSE_VECTOR)
387+
)
388+
);
389+
assertTrue(exception.getMessage().contains(UNSUPPORTED_INDEX_MESSAGE));
390+
assertTrue(exception.getRootCause() instanceof UnsupportedOperationException);
391+
}
392+
340393
public void testMultiFieldsSupport() throws IOException {
341394
if (useLegacyFormat) {
342395
Exception e = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> {
@@ -973,6 +1026,34 @@ public void testExistsQueryDenseVector() throws IOException {
9731026
assertThat(existsQuery, instanceOf(ESToParentBlockJoinQuery.class));
9741027
}
9751028

1029+
/**
1030+
* Semantic text version error supersedes deprecated boost warning
1031+
* @throws IOException
1032+
*/
1033+
@Override
1034+
public void testDeprecatedBoostWarning() throws IOException {
1035+
try {
1036+
createMapperService(DEPRECATED_BOOST_INDEX_VERSION, fieldMapping(b -> {
1037+
minimalMapping(b, DEPRECATED_BOOST_INDEX_VERSION);
1038+
b.field("boost", 2.0);
1039+
}));
1040+
String[] warnings = Strings.concatStringArrays(
1041+
getParseMinimalWarnings(DEPRECATED_BOOST_INDEX_VERSION),
1042+
new String[] { "Parameter [boost] on field [field] is deprecated and has no effect" }
1043+
);
1044+
assertWarnings(warnings);
1045+
} catch (MapperParsingException e) {
1046+
assertThat(
1047+
e.getMessage(),
1048+
anyOf(
1049+
containsString(UNSUPPORTED_INDEX_MESSAGE),
1050+
containsString("Unknown parameter [boost]"),
1051+
containsString("[boost : 2.0]")
1052+
)
1053+
);
1054+
}
1055+
}
1056+
9761057
@Override
9771058
protected void assertExistsQuery(MappedFieldType fieldType, Query query, LuceneDocument fields) {
9781059
// Until a doc is indexed, the query is rewritten as match no docs

0 commit comments

Comments
 (0)