Skip to content

Commit 393ea61

Browse files
authored
Semantic text with legacy format cannot store term vectors (#119690)
This change fixes the store option of the underlying sparse vector field for semantic text. For legacy indices, the store option should remain false since changing this option on an existing index is not allowed and that term vectors are already stored in _source.
1 parent 7262fb3 commit 393ea61

File tree

2 files changed

+12
-4
lines changed

2 files changed

+12
-4
lines changed

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -858,7 +858,7 @@ private static NestedObjectMapper.Builder createChunksField(
858858
);
859859
chunksField.dynamic(ObjectMapper.Dynamic.FALSE);
860860
if (modelSettings != null) {
861-
chunksField.add(createEmbeddingsField(indexSettings.getIndexVersionCreated(), modelSettings));
861+
chunksField.add(createEmbeddingsField(indexSettings.getIndexVersionCreated(), modelSettings, useLegacyFormat));
862862
}
863863
if (useLegacyFormat) {
864864
var chunkTextField = new KeywordFieldMapper.Builder(TEXT_FIELD, indexVersionCreated).indexed(false).docValues(false);
@@ -869,9 +869,13 @@ private static NestedObjectMapper.Builder createChunksField(
869869
return chunksField;
870870
}
871871

872-
private static Mapper.Builder createEmbeddingsField(IndexVersion indexVersionCreated, SemanticTextField.ModelSettings modelSettings) {
872+
private static Mapper.Builder createEmbeddingsField(
873+
IndexVersion indexVersionCreated,
874+
SemanticTextField.ModelSettings modelSettings,
875+
boolean useLegacyFormat
876+
) {
873877
return switch (modelSettings.taskType()) {
874-
case SPARSE_EMBEDDING -> new SparseVectorFieldMapper.Builder(CHUNKED_EMBEDDINGS_FIELD).setStored(true);
878+
case SPARSE_EMBEDDING -> new SparseVectorFieldMapper.Builder(CHUNKED_EMBEDDINGS_FIELD).setStored(useLegacyFormat == false);
875879
case TEXT_EMBEDDING -> {
876880
DenseVectorFieldMapper.Builder denseVectorMapperBuilder = new DenseVectorFieldMapper.Builder(
877881
CHUNKED_EMBEDDINGS_FIELD,

x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -536,7 +536,11 @@ private static void assertSemanticTextField(MapperService mapperService, String
536536
assertTrue(embeddingsFieldMapper.fieldType() == mapperService.mappingLookup().getFieldType(getEmbeddingsFieldName(fieldName)));
537537
assertThat(embeddingsMapper.fullPath(), equalTo(getEmbeddingsFieldName(fieldName)));
538538
switch (semanticFieldMapper.fieldType().getModelSettings().taskType()) {
539-
case SPARSE_EMBEDDING -> assertThat(embeddingsMapper, instanceOf(SparseVectorFieldMapper.class));
539+
case SPARSE_EMBEDDING -> {
540+
assertThat(embeddingsMapper, instanceOf(SparseVectorFieldMapper.class));
541+
SparseVectorFieldMapper sparseMapper = (SparseVectorFieldMapper) embeddingsMapper;
542+
assertEquals(sparseMapper.fieldType().isStored(), semanticTextFieldType.useLegacyFormat() == false);
543+
}
540544
case TEXT_EMBEDDING -> assertThat(embeddingsMapper, instanceOf(DenseVectorFieldMapper.class));
541545
default -> throw new AssertionError("Invalid task type");
542546
}

0 commit comments

Comments
 (0)