From 474a4057ea277a3d3bc66b4b42e5df73aa8341cd Mon Sep 17 00:00:00 2001 From: Jordan Powers Date: Mon, 30 Jun 2025 14:40:19 -0700 Subject: [PATCH 1/3] Add index version for match_only_text stored field in binary format --- .../extras/MatchOnlyTextFieldMapper.java | 46 +++++++++++++++---- .../elasticsearch/index/IndexVersions.java | 1 + 2 files changed, 39 insertions(+), 8 deletions(-) diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java index 387477570ece0..1e993d3b3e88b 100644 --- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java @@ -35,6 +35,7 @@ import org.elasticsearch.common.text.UTF8DecodingReader; import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.index.IndexVersions; import org.elasticsearch.index.analysis.IndexAnalyzers; import org.elasticsearch.index.analysis.NamedAnalyzer; import org.elasticsearch.index.fielddata.FieldDataContext; @@ -105,8 +106,15 @@ public static class Builder extends FieldMapper.Builder { private final TextParams.Analyzers analyzers; private final boolean withinMultiField; + private final boolean storedFieldInBinaryFormat; - public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers indexAnalyzers, boolean withinMultiField) { + public Builder( + String name, + IndexVersion indexCreatedVersion, + IndexAnalyzers indexAnalyzers, + boolean withinMultiField, + boolean storedFieldInBinaryFormat + ) { super(name); this.indexCreatedVersion = indexCreatedVersion; this.analyzers = new TextParams.Analyzers( @@ -116,6 +124,7 @@ public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers ind indexCreatedVersion ); this.withinMultiField = withinMultiField; + this.storedFieldInBinaryFormat = storedFieldInBinaryFormat; } @Override @@ -135,7 +144,8 @@ private MatchOnlyTextFieldType buildFieldType(MapperBuilderContext context) { context.isSourceSynthetic(), meta.getValue(), withinMultiField, - multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField() + multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField(), + storedFieldInBinaryFormat ); return ft; } @@ -156,7 +166,13 @@ public MatchOnlyTextFieldMapper build(MapperBuilderContext context) { } public static final TypeParser PARSER = new TypeParser( - (n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers(), c.isWithinMultiField()) + (n, c) -> new Builder( + n, + c.indexVersionCreated(), + c.getIndexAnalyzers(), + c.isWithinMultiField(), + c.indexVersionCreated().onOrAfter(IndexVersions.MATCH_ONLY_TEXT_STORED_AS_BYTES) + ) ); public static class MatchOnlyTextFieldType extends StringFieldType { @@ -167,6 +183,7 @@ public static class MatchOnlyTextFieldType extends StringFieldType { private final boolean withinMultiField; private final boolean hasCompatibleMultiFields; + private final boolean storedFieldInBinaryFormat; public MatchOnlyTextFieldType( String name, @@ -175,7 +192,8 @@ public MatchOnlyTextFieldType( boolean isSyntheticSource, Map meta, boolean withinMultiField, - boolean hasCompatibleMultiFields + boolean hasCompatibleMultiFields, + boolean storedFieldInBinaryFormat ) { super(name, true, false, false, tsi, meta); this.indexAnalyzer = Objects.requireNonNull(indexAnalyzer); @@ -183,6 +201,7 @@ public MatchOnlyTextFieldType( this.originalName = isSyntheticSource ? name + "._original" : null; this.withinMultiField = withinMultiField; this.hasCompatibleMultiFields = hasCompatibleMultiFields; + this.storedFieldInBinaryFormat = storedFieldInBinaryFormat; } public MatchOnlyTextFieldType(String name) { @@ -193,6 +212,7 @@ public MatchOnlyTextFieldType(String name) { false, Collections.emptyMap(), false, + false, false ); } @@ -451,7 +471,11 @@ protected BytesRef toBytesRef(Object v) { @Override public BlockLoader blockLoader(BlockLoaderContext blContext) { if (textFieldType.isSyntheticSource()) { - return new BytesFromMixedStringsBytesRefBlockLoader(storedFieldNameForSyntheticSource()); + if (storedFieldInBinaryFormat) { + return new BlockStoredFieldsReader.BytesFromBytesRefsBlockLoader(storedFieldNameForSyntheticSource()); + } else { + return new BytesFromMixedStringsBytesRefBlockLoader(storedFieldNameForSyntheticSource()); + } } SourceValueFetcher fetcher = SourceValueFetcher.toString(blContext.sourcePaths(name())); // MatchOnlyText never has norms, so we have to use the field names field @@ -502,6 +526,7 @@ private String storedFieldNameForSyntheticSource() { private final boolean storeSource; private final FieldType fieldType; private final boolean withinMultiField; + private final boolean storedFieldInBinaryFormat; private MatchOnlyTextFieldMapper( String simpleName, @@ -521,6 +546,7 @@ private MatchOnlyTextFieldMapper( this.positionIncrementGap = builder.analyzers.positionIncrementGap.getValue(); this.storeSource = storeSource; this.withinMultiField = builder.withinMultiField; + this.storedFieldInBinaryFormat = builder.storedFieldInBinaryFormat; } @Override @@ -530,7 +556,7 @@ public Map indexAnalyzers() { @Override public FieldMapper.Builder getMergeBuilder() { - return new Builder(leafName(), indexCreatedVersion, indexAnalyzers, withinMultiField).init(this); + return new Builder(leafName(), indexCreatedVersion, indexAnalyzers, withinMultiField, storedFieldInBinaryFormat).init(this); } @Override @@ -547,8 +573,12 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio context.addToFieldNames(fieldType().name()); if (storeSource) { - final var bytesRef = new BytesRef(utfBytes.bytes(), utfBytes.offset(), utfBytes.length()); - context.doc().add(new StoredField(fieldType().storedFieldNameForSyntheticSource(), bytesRef)); + if (storedFieldInBinaryFormat) { + final var bytesRef = new BytesRef(utfBytes.bytes(), utfBytes.offset(), utfBytes.length()); + context.doc().add(new StoredField(fieldType().storedFieldNameForSyntheticSource(), bytesRef)); + } else { + context.doc().add(new StoredField(fieldType().storedFieldNameForSyntheticSource(), value.string())); + } } } diff --git a/server/src/main/java/org/elasticsearch/index/IndexVersions.java b/server/src/main/java/org/elasticsearch/index/IndexVersions.java index 6ff33cf05d51f..ba150579540e9 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexVersions.java +++ b/server/src/main/java/org/elasticsearch/index/IndexVersions.java @@ -178,6 +178,7 @@ private static Version parseUnchecked(String version) { public static final IndexVersion UPGRADE_TO_LUCENE_10_2_2 = def(9_030_0_00, Version.LUCENE_10_2_2); public static final IndexVersion SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT = def(9_031_0_00, Version.LUCENE_10_2_2); public static final IndexVersion DEFAULT_DENSE_VECTOR_TO_BBQ_HNSW = def(9_032_0_00, Version.LUCENE_10_2_2); + public static final IndexVersion MATCH_ONLY_TEXT_STORED_AS_BYTES = def(9_033_0_00, Version.LUCENE_10_2_2); /* * STOP! READ THIS FIRST! No, really, From 6213eec045118196b44d8eab04a14b9d49a89fff Mon Sep 17 00:00:00 2001 From: Jordan Powers Date: Tue, 1 Jul 2025 08:55:13 -0700 Subject: [PATCH 2/3] Add 8.x backport index version --- .../index/mapper/extras/MatchOnlyTextFieldMapper.java | 10 +++++++++- .../java/org/elasticsearch/index/IndexVersions.java | 1 + 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java index 1e993d3b3e88b..500b51415a453 100644 --- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java @@ -165,13 +165,21 @@ public MatchOnlyTextFieldMapper build(MapperBuilderContext context) { } } + private static boolean isSyntheticSourceStoredFieldInBinaryFormat(IndexVersion indexCreatedVersion) { + return indexCreatedVersion.onOrAfter(IndexVersions.MATCH_ONLY_TEXT_STORED_AS_BYTES) + || indexCreatedVersion.between( + IndexVersions.SYNTHETIC_SOURCE_STORE_ARRAYS_NATIVELY_BACKPORT_8_X, + IndexVersions.UPGRADE_TO_LUCENE_10_0_0 + ); + } + public static final TypeParser PARSER = new TypeParser( (n, c) -> new Builder( n, c.indexVersionCreated(), c.getIndexAnalyzers(), c.isWithinMultiField(), - c.indexVersionCreated().onOrAfter(IndexVersions.MATCH_ONLY_TEXT_STORED_AS_BYTES) + isSyntheticSourceStoredFieldInBinaryFormat(c.indexVersionCreated()) ) ); diff --git a/server/src/main/java/org/elasticsearch/index/IndexVersions.java b/server/src/main/java/org/elasticsearch/index/IndexVersions.java index ba150579540e9..2e464afa72b76 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexVersions.java +++ b/server/src/main/java/org/elasticsearch/index/IndexVersions.java @@ -145,6 +145,7 @@ private static Version parseUnchecked(String version) { public static final IndexVersion MAPPER_TEXT_MATCH_ONLY_MULTI_FIELDS_DEFAULT_NOT_STORED_8_19 = def(8_533_0_00, Version.LUCENE_9_12_1); public static final IndexVersion UPGRADE_TO_LUCENE_9_12_2 = def(8_534_0_00, Version.LUCENE_9_12_2); public static final IndexVersion SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT_BACKPORT_8_X = def(8_535_0_00, Version.LUCENE_9_12_2); + public static final IndexVersion MATCH_ONLY_TEXT_STORED_AS_BYTES_BACKPORT_8_X = def(8_536_0_00, Version.LUCENE_9_12_2); public static final IndexVersion UPGRADE_TO_LUCENE_10_0_0 = def(9_000_0_00, Version.LUCENE_10_0_0); public static final IndexVersion LOGSDB_DEFAULT_IGNORE_DYNAMIC_BEYOND_LIMIT = def(9_001_0_00, Version.LUCENE_10_0_0); public static final IndexVersion TIME_BASED_K_ORDERED_DOC_ID = def(9_002_0_00, Version.LUCENE_10_0_0); From 65b4761dfd5e33c441f99f392aa8ae93d277eab5 Mon Sep 17 00:00:00 2001 From: Jordan Powers Date: Tue, 1 Jul 2025 09:03:58 -0700 Subject: [PATCH 3/3] Use older index version in testLoadSyntheticSourceFromStringOrBytesRef --- .../mapper/extras/MatchOnlyTextFieldMapperTests.java | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapperTests.java b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapperTests.java index cfbf3a338f691..ef72e234f8d6b 100644 --- a/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapperTests.java +++ b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapperTests.java @@ -26,8 +26,10 @@ import org.apache.lucene.tests.index.RandomIndexWriter; import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.Strings; +import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.Tuple; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.IndexVersions; import org.elasticsearch.index.mapper.DocumentMapper; import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.LuceneDocument; @@ -356,10 +358,14 @@ public void testStoreParameterDefaultsSyntheticSourceTextFieldIsMultiField() thr } public void testLoadSyntheticSourceFromStringOrBytesRef() throws IOException { - DocumentMapper mapper = createSytheticSourceMapperService(mapping(b -> { + var mappings = mapping(b -> { b.startObject("field1").field("type", "match_only_text").endObject(); b.startObject("field2").field("type", "match_only_text").endObject(); - })).documentMapper(); + }); + var settings = Settings.builder().put("index.mapping.source.mode", "synthetic").build(); + DocumentMapper mapper = createMapperService(IndexVersions.UPGRADE_TO_LUCENE_10_2_2, settings, () -> true, mappings) + .documentMapper(); + try (Directory directory = newDirectory()) { RandomIndexWriter iw = indexWriterForSyntheticSource(directory);