Skip to content

Commit 7f58bdf

Browse files
kderussomromaios
authored andcommitted
[8.19] refactor(semantic_text): fail early in pre-8.11 indices (elastic#133080) (elastic#133580)
* refactor(semantic_text): fail early in pre-8.11 indices (elastic#133080) * fix(semantic_text): index underlying dense_vector field in older indices * Update docs/changelog/133080.yaml * [CI] Auto commit changes from spotless * update msg, change versions, add ut * indent * remove todo * [CI] Auto commit changes from spotless * update msg * [CI] Auto commit changes from spotless * add densevectormapper indexed ut * [CI] Auto commit changes from spotless * fix ut * undo accidental ut removal * [CI] Auto commit changes from spotless * Update docs/changelog/133080.yaml Co-authored-by: Kathleen DeRusso <[email protected]> * remove sparse_vector exception change * [CI] Auto commit changes from spotless * reverting most of the stuff * removing unused import, rename test * [CI] Auto commit changes from spotless * syntax * revisit changelog * revisit changelog * Update docs/changelog/133080.yaml Co-authored-by: Kathleen DeRusso <[email protected]> * Update x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java Co-authored-by: Kathleen DeRusso <[email protected]> * changelog format * fix UT --------- Co-authored-by: elasticsearchmachine <[email protected]> Co-authored-by: Kathleen DeRusso <[email protected]> (cherry picked from commit 8f41a4b) # Conflicts: # server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java # x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java * Fix test --------- Co-authored-by: Michail Romaios <[email protected]>
1 parent 9ac0a22 commit 7f58bdf

File tree

5 files changed

+94
-2
lines changed

5 files changed

+94
-2
lines changed

docs/changelog/133080.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 133080
2+
summary: "Disallow creating `semantic_text` fields in indices created prior to 8.11.0"
3+
area: Relevance
4+
type: bug
5+
issues: []

server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@
6464

6565
import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.DEFAULT_BEAM_WIDTH;
6666
import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN;
67+
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.INDEXED_BY_DEFAULT_INDEX_VERSION;
6768
import static org.hamcrest.Matchers.containsString;
6869
import static org.hamcrest.Matchers.equalTo;
6970
import static org.hamcrest.Matchers.instanceOf;
@@ -100,7 +101,7 @@ private void indexMapping(XContentBuilder b, IndexVersion indexVersion) throws I
100101
if (elementType != ElementType.FLOAT) {
101102
b.field("element_type", elementType.toString());
102103
}
103-
if (indexVersion.onOrAfter(DenseVectorFieldMapper.INDEXED_BY_DEFAULT_INDEX_VERSION) || indexed) {
104+
if (indexVersion.onOrAfter(INDEXED_BY_DEFAULT_INDEX_VERSION) || indexed) {
104105
// Serialize if it's new index version, or it was not the default for previous indices
105106
b.field("index", indexed);
106107
}

test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -517,7 +517,7 @@ public final void testMeta() throws IOException {
517517
);
518518
}
519519

520-
public final void testDeprecatedBoostWarning() throws IOException {
520+
public void testDeprecatedBoostWarning() throws IOException {
521521
try {
522522
createMapperService(DEPRECATED_BOOST_INDEX_VERSION, fieldMapping(b -> {
523523
minimalMapping(b, DEPRECATED_BOOST_INDEX_VERSION);

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@
9898
import java.util.function.Function;
9999
import java.util.function.Supplier;
100100

101+
import static org.elasticsearch.index.IndexVersions.NEW_SPARSE_VECTOR;
101102
import static org.elasticsearch.index.IndexVersions.SEMANTIC_TEXT_DEFAULTS_TO_BBQ_BACKPORT_8_X;
102103
import static org.elasticsearch.inference.TaskType.SPARSE_EMBEDDING;
103104
import static org.elasticsearch.inference.TaskType.TEXT_EMBEDDING;
@@ -122,6 +123,7 @@
122123
*/
123124
public class SemanticTextFieldMapper extends FieldMapper implements InferenceFieldMapper {
124125
private static final Logger logger = LogManager.getLogger(SemanticTextFieldMapper.class);
126+
public static final String UNSUPPORTED_INDEX_MESSAGE = "[semantic_text] is available on indices created with 8.11 or higher.";
125127
public static final NodeFeature SEMANTIC_TEXT_SEARCH_INFERENCE_ID = new NodeFeature("semantic_text.search_inference_id", true);
126128
public static final NodeFeature SEMANTIC_TEXT_DEFAULT_ELSER_2 = new NodeFeature("semantic_text.default_elser_2", true);
127129
public static final NodeFeature SEMANTIC_TEXT_IN_OBJECT_FIELD_FIX = new NodeFeature("semantic_text.in_object_field_fix");
@@ -156,6 +158,9 @@ public static final TypeParser parser(Supplier<ModelRegistry> modelRegistry) {
156158

157159
public static BiConsumer<String, MappingParserContext> validateParserContext(String type) {
158160
return (n, c) -> {
161+
if (c.getIndexSettings().getIndexVersionCreated().before(NEW_SPARSE_VECTOR)) {
162+
throw new UnsupportedOperationException(UNSUPPORTED_INDEX_MESSAGE);
163+
}
159164
if (InferenceMetadataFieldsMapper.isEnabled(c.getIndexSettings().getSettings()) == false) {
160165
notInMultiFields(type).accept(n, c);
161166
}

x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,9 +107,11 @@
107107
import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper.DEFAULT_ELSER_2_INFERENCE_ID;
108108
import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper.DEFAULT_RESCORE_OVERSAMPLE;
109109
import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper.INDEX_OPTIONS_FIELD;
110+
import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper.UNSUPPORTED_INDEX_MESSAGE;
110111
import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldTests.generateRandomChunkingSettings;
111112
import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldTests.generateRandomChunkingSettingsOtherThan;
112113
import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldTests.randomSemanticText;
114+
import static org.hamcrest.Matchers.anyOf;
113115
import static org.hamcrest.Matchers.containsString;
114116
import static org.hamcrest.Matchers.equalTo;
115117
import static org.hamcrest.Matchers.instanceOf;
@@ -400,6 +402,57 @@ public void testInvalidTaskTypes() {
400402
}
401403
}
402404

405+
@Override
406+
protected IndexVersion boostNotAllowedIndexVersion() {
407+
return IndexVersions.NEW_SPARSE_VECTOR;
408+
}
409+
410+
public void testOldIndexSemanticTextDenseVectorRaisesError() throws IOException {
411+
final String fieldName = "field";
412+
final XContentBuilder fieldMapping = fieldMapping(b -> {
413+
b.field("type", "semantic_text");
414+
b.field(INFERENCE_ID_FIELD, "test_inference_id");
415+
b.startObject("model_settings");
416+
b.field("task_type", "text_embedding");
417+
b.field("dimensions", 384);
418+
b.field("similarity", "cosine");
419+
b.field("element_type", "float");
420+
b.endObject();
421+
});
422+
assertOldIndexUnsupported(fieldMapping);
423+
}
424+
425+
public void testOldIndexSemanticTextMinimalMappingRaisesError() throws IOException {
426+
final XContentBuilder fieldMapping = fieldMapping(this::minimalMapping);
427+
assertOldIndexUnsupported(fieldMapping);
428+
}
429+
430+
public void testOldIndexSemanticTextSparseVersionRaisesError() throws IOException {
431+
final XContentBuilder fieldMapping = fieldMapping(b -> {
432+
b.field("type", "semantic_text");
433+
b.field("inference_id", "another_inference_id");
434+
b.startObject("model_settings");
435+
b.field("task_type", "sparse_embedding");
436+
b.endObject();
437+
});
438+
assertOldIndexUnsupported(fieldMapping);
439+
}
440+
441+
private void assertOldIndexUnsupported(XContentBuilder fieldMapping) {
442+
443+
MapperParsingException exception = assertThrows(
444+
MapperParsingException.class,
445+
() -> createMapperService(
446+
fieldMapping,
447+
true,
448+
IndexVersions.V_8_0_0,
449+
IndexVersionUtils.getPreviousVersion(IndexVersions.NEW_SPARSE_VECTOR)
450+
)
451+
);
452+
assertTrue(exception.getMessage().contains(UNSUPPORTED_INDEX_MESSAGE));
453+
assertTrue(exception.getRootCause() instanceof UnsupportedOperationException);
454+
}
455+
403456
public void testMultiFieldsSupport() throws IOException {
404457
if (useLegacyFormat) {
405458
Exception e = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> {
@@ -1364,6 +1417,34 @@ public void testSpecifiedDenseVectorIndexOptions() throws IOException {
13641417

13651418
}
13661419

1420+
/**
1421+
* Semantic text version error supersedes deprecated boost warning
1422+
* @throws IOException
1423+
*/
1424+
@Override
1425+
public void testDeprecatedBoostWarning() throws IOException {
1426+
try {
1427+
createMapperService(DEPRECATED_BOOST_INDEX_VERSION, fieldMapping(b -> {
1428+
minimalMapping(b, DEPRECATED_BOOST_INDEX_VERSION);
1429+
b.field("boost", 2.0);
1430+
}));
1431+
String[] warnings = Strings.concatStringArrays(
1432+
getParseMinimalWarnings(DEPRECATED_BOOST_INDEX_VERSION),
1433+
new String[] { "Parameter [boost] on field [field] is deprecated and has no effect" }
1434+
);
1435+
assertWarnings(warnings);
1436+
} catch (MapperParsingException e) {
1437+
assertThat(
1438+
e.getMessage(),
1439+
anyOf(
1440+
containsString(UNSUPPORTED_INDEX_MESSAGE),
1441+
containsString("Unknown parameter [boost]"),
1442+
containsString("[boost : 2.0]")
1443+
)
1444+
);
1445+
}
1446+
}
1447+
13671448
public static SemanticTextIndexOptions randomSemanticTextIndexOptions() {
13681449
TaskType taskType = randomFrom(TaskType.SPARSE_EMBEDDING, TaskType.TEXT_EMBEDDING);
13691450
return randomSemanticTextIndexOptions(taskType);

0 commit comments

Comments
 (0)