From e3509c8a8e280d6258aedeee303909eb36378e2a Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Mon, 6 Oct 2025 12:09:19 +0200 Subject: [PATCH 01/15] Introduce `index.mapping.use_binary_doc_values` to experiment with binary doc values for keyword field. --- .../esql/ValuesSourceReaderBenchmark.java | 4 +- .../extras/MatchOnlyTextFieldTypeTests.java | 8 +- .../common/settings/IndexScopedSettings.java | 1 + .../elasticsearch/index/IndexSettings.java | 7 + .../index/mapper/BlockDocValuesReader.java | 22 +++ .../index/mapper/KeywordFieldMapper.java | 157 +++++++++++++++--- .../index/mapper/KeywordFieldTypeTests.java | 21 ++- .../index/mapper/TextFieldTypeTests.java | 6 +- .../ValueSourceReaderTypeConversionTests.java | 4 +- .../read/ValuesSourceReaderOperatorTests.java | 4 +- .../planner/EsPhysicalOperationProviders.java | 3 +- .../test/keyword_use_binary_doc_values.yml | 117 +++++++++++++ 12 files changed, 307 insertions(+), 47 deletions(-) create mode 100644 x-pack/plugin/logsdb/src/yamlRestTest/resources/rest-api-spec/test/keyword_use_binary_doc_values.yml diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/_nightly/esql/ValuesSourceReaderBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/_nightly/esql/ValuesSourceReaderBenchmark.java index 9cc304828a8a7..b9756c267e0b5 100644 --- a/benchmarks/src/main/java/org/elasticsearch/benchmark/_nightly/esql/ValuesSourceReaderBenchmark.java +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/_nightly/esql/ValuesSourceReaderBenchmark.java @@ -222,8 +222,8 @@ private static BlockLoader blockLoader(String name) { Lucene.KEYWORD_ANALYZER, Lucene.KEYWORD_ANALYZER, new KeywordFieldMapper.Builder(name, IndexVersion.current()).docValues(ft.docValuesType() != DocValuesType.NONE), - syntheticSource - ).blockLoader(new MappedFieldType.BlockLoaderContext() { + syntheticSource, + useBinaryDocValues).blockLoader(new MappedFieldType.BlockLoaderContext() { @Override public String indexName() { return "benchmark"; diff --git a/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldTypeTests.java b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldTypeTests.java index 8e70945fc2a76..bbbdacea062e3 100644 --- a/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldTypeTests.java +++ b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldTypeTests.java @@ -298,8 +298,8 @@ public void testBlockLoaderDoesNotUseSyntheticSourceDelegateWhenIgnoreAboveIsSet mock(NamedAnalyzer.class), mock(NamedAnalyzer.class), builder, - true - ); + true, + useBinaryDocValues); MatchOnlyTextFieldMapper.MatchOnlyTextFieldType ft = new MatchOnlyTextFieldMapper.MatchOnlyTextFieldType( "parent", @@ -346,8 +346,8 @@ public void testBlockLoaderDoesNotUseSyntheticSourceDelegateWhenIgnoreAboveIsSet mock(NamedAnalyzer.class), mock(NamedAnalyzer.class), builder, - true - ); + true, + useBinaryDocValues); MatchOnlyTextFieldMapper.MatchOnlyTextFieldType ft = new MatchOnlyTextFieldMapper.MatchOnlyTextFieldType( "parent", diff --git a/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java b/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java index d4fef4e9bb489..9de1f103d85f5 100644 --- a/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java +++ b/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java @@ -243,6 +243,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings { if (IndexSettings.DOC_VALUES_SKIPPER) { settings.add(IndexSettings.USE_DOC_VALUES_SKIPPER); } + settings.add(IndexSettings.USE_BINARY_DOC_VALUES); settings.add(IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING); BUILT_IN_INDEX_SETTINGS = Collections.unmodifiableSet(settings); }; diff --git a/server/src/main/java/org/elasticsearch/index/IndexSettings.java b/server/src/main/java/org/elasticsearch/index/IndexSettings.java index b396e1ca206e3..de73e65a4696f 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexSettings.java +++ b/server/src/main/java/org/elasticsearch/index/IndexSettings.java @@ -675,6 +675,13 @@ public boolean isES87TSDBCodecEnabled() { Property.Final ); + public static final Setting USE_BINARY_DOC_VALUES = Setting.boolSetting( + "index.mapping.use_binary_doc_values", + false, + Property.IndexScope, + Property.Final + ); + /** * The {@link IndexMode "mode"} of the index. */ diff --git a/server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java b/server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java index 457c90383b5d2..795401dd3e3e3 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java @@ -925,6 +925,28 @@ public AllReader reader(LeafReaderContext context) throws IOException { } } + public static class BytesRefsFromBinaryBlockLoader extends DocValuesBlockLoader { + private final String fieldName; + + public BytesRefsFromBinaryBlockLoader(String fieldName) { + this.fieldName = fieldName; + } + + @Override + public Builder builder(BlockFactory factory, int expectedCount) { + return factory.bytesRefs(expectedCount); + } + + @Override + public AllReader reader(LeafReaderContext context) throws IOException { + BinaryDocValues docValues = context.reader().getBinaryDocValues(fieldName); + if (docValues == null) { + return new ConstantNullsReader(); + } + return new BytesRefsFromBinary(docValues); + } + } + abstract static class AbstractBytesRefsFromBinary extends BlockDocValuesReader { protected final BinaryDocValues docValues; diff --git a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java index cf3fad86812f5..eee62b72ccac8 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java @@ -13,15 +13,18 @@ import org.apache.logging.log4j.Logger; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.document.BinaryDocValuesField; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.InvertableType; import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.document.StoredField; +import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.DocValuesSkipIndexType; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.MultiTerms; import org.apache.lucene.index.Term; @@ -90,6 +93,7 @@ import static org.apache.lucene.index.IndexWriter.MAX_TERM_LENGTH; import static org.elasticsearch.core.Strings.format; import static org.elasticsearch.index.IndexSettings.IGNORE_ABOVE_SETTING; +import static org.elasticsearch.index.IndexSettings.USE_BINARY_DOC_VALUES; import static org.elasticsearch.index.IndexSettings.USE_DOC_VALUES_SKIPPER; import static org.elasticsearch.index.mapper.FieldArrayContext.getOffsetsFieldName; import static org.elasticsearch.index.mapper.Mapper.IgnoreAbove.getIgnoreAboveDefaultValue; @@ -108,6 +112,8 @@ public static class Defaults { public static final FieldType FIELD_TYPE; public static final FieldType FIELD_TYPE_WITH_SKIP_DOC_VALUES; + public static final FieldType FIELD_TYPE_WITH_BINARY_DOC_VALUES; + static { FieldType ft = new FieldType(); ft.setTokenized(false); @@ -127,6 +133,15 @@ public static class Defaults { FIELD_TYPE_WITH_SKIP_DOC_VALUES = freezeAndDeduplicateFieldType(ft); } + static { + FieldType ft = new FieldType(); + ft.setTokenized(false); + ft.setOmitNorms(true); + ft.setIndexOptions(IndexOptions.NONE); + ft.setDocValuesType(DocValuesType.BINARY); + FIELD_TYPE_WITH_BINARY_DOC_VALUES = freezeAndDeduplicateFieldType(ft); + } + public static final TextSearchInfo TEXT_SEARCH_INFO = new TextSearchInfo( FIELD_TYPE, null, @@ -214,6 +229,7 @@ public static final class Builder extends FieldMapper.DimensionBuilder { private final boolean forceDocValuesSkipper; private final SourceKeepMode indexSourceKeepMode; private final boolean isWithinMultiField; + private final boolean useBinaryDocValues; public Builder(final String name, final MappingParserContext mappingParserContext) { this( @@ -227,7 +243,8 @@ public Builder(final String name, final MappingParserContext mappingParserContex USE_DOC_VALUES_SKIPPER.get(mappingParserContext.getSettings()), false, mappingParserContext.getIndexSettings().sourceKeepMode(), - mappingParserContext.isWithinMultiField() + mappingParserContext.isWithinMultiField(), + USE_BINARY_DOC_VALUES.get(mappingParserContext.getSettings()) ); } @@ -250,7 +267,8 @@ public Builder(final String name, final MappingParserContext mappingParserContex false, false, sourceKeepMode, - isWithinMultiField + isWithinMultiField, + false ); } @@ -265,7 +283,8 @@ private Builder( boolean enableDocValuesSkipper, boolean forceDocValuesSkipper, SourceKeepMode indexSourceKeepMode, - boolean isWithinMultiField + boolean isWithinMultiField, + boolean binaryDocValuesEnabled ) { super(name); this.indexAnalyzers = indexAnalyzers; @@ -301,6 +320,7 @@ private Builder( this.forceDocValuesSkipper = forceDocValuesSkipper; this.indexSourceKeepMode = indexSourceKeepMode; this.isWithinMultiField = isWithinMultiField; + this.useBinaryDocValues = binaryDocValuesEnabled; } public Builder(String name, IndexVersion indexCreatedVersion) { @@ -330,7 +350,8 @@ public static Builder buildWithDocValuesSkipper( enableDocValuesSkipper, true, SourceKeepMode.NONE, - isWithinMultiField + isWithinMultiField, + false ); } @@ -413,7 +434,7 @@ protected Parameter[] getParameters() { dimension }; } - private KeywordFieldType buildFieldType(MapperBuilderContext context, FieldType fieldType) { + private KeywordFieldType buildFieldType(MapperBuilderContext context, FieldType fieldType, boolean useBinaryDocValues) { NamedAnalyzer normalizer = Lucene.KEYWORD_ANALYZER; NamedAnalyzer searchAnalyzer = Lucene.KEYWORD_ANALYZER; NamedAnalyzer quoteAnalyzer = Lucene.KEYWORD_ANALYZER; @@ -448,20 +469,25 @@ private KeywordFieldType buildFieldType(MapperBuilderContext context, FieldType searchAnalyzer, quoteAnalyzer, this, - context.isSourceSynthetic() + context.isSourceSynthetic(), + useBinaryDocValues ); } @Override public KeywordFieldMapper build(MapperBuilderContext context) { + String fullName = context.buildFullName(leafName()); + // Index sorting by binary doc values not support (yet): + boolean useBinaryDocValues = fullName.equals("host.name") == false && this.useBinaryDocValues; FieldType fieldtype = resolveFieldType( + useBinaryDocValues, enableDocValuesSkipper, forceDocValuesSkipper, hasDocValues, indexCreatedVersion, indexSortConfig, indexMode, - context.buildFullName(leafName()) + fullName ); fieldtype.setOmitNorms(this.hasNorms.getValue() == false); fieldtype.setStored(this.stored.getValue()); @@ -492,15 +518,17 @@ public KeywordFieldMapper build(MapperBuilderContext context) { return new KeywordFieldMapper( leafName(), fieldtype, - buildFieldType(context, fieldtype), + buildFieldType(context, fieldtype, useBinaryDocValues), builderParams(this, context), this, offsetsFieldName, - indexSourceKeepMode + indexSourceKeepMode, + useBinaryDocValues ); } private static FieldType resolveFieldType( + final boolean useBinaryDocValues, final boolean enableDocValuesSkipper, final boolean forceDocValuesSkipper, final Parameter hasDocValues, @@ -509,6 +537,10 @@ private static FieldType resolveFieldType( final IndexMode indexMode, final String fullFieldName ) { + if (useBinaryDocValues) { + return new FieldType(Defaults.FIELD_TYPE_WITH_BINARY_DOC_VALUES); + } + if (enableDocValuesSkipper) { if (forceDocValuesSkipper) { assert hasDocValues.getValue(); @@ -553,6 +585,7 @@ public static final class KeywordFieldType extends TextFamilyFieldType { private final boolean isDimension; private final IndexSortConfig indexSortConfig; private final boolean hasDocValuesSkipper; + private final boolean useBinaryDocValues; public KeywordFieldType( String name, @@ -561,7 +594,8 @@ public KeywordFieldType( NamedAnalyzer searchAnalyzer, NamedAnalyzer quoteAnalyzer, Builder builder, - boolean isSyntheticSource + boolean isSyntheticSource, + boolean useBinaryDocValues ) { super( name, @@ -581,6 +615,7 @@ public KeywordFieldType( this.isDimension = builder.dimension.getValue(); this.indexSortConfig = builder.indexSortConfig; this.hasDocValuesSkipper = DocValuesSkipIndexType.NONE.equals(fieldType.docValuesSkipIndexType()) == false; + this.useBinaryDocValues = useBinaryDocValues; } public KeywordFieldType(String name) { @@ -597,6 +632,7 @@ public KeywordFieldType(String name, boolean isIndexed, boolean hasDocValues, Ma this.isDimension = false; this.indexSortConfig = null; this.hasDocValuesSkipper = false; + this.useBinaryDocValues = false; } public KeywordFieldType(String name, FieldType fieldType) { @@ -618,6 +654,7 @@ public KeywordFieldType(String name, FieldType fieldType) { this.isDimension = false; this.indexSortConfig = null; this.hasDocValuesSkipper = DocValuesSkipIndexType.NONE.equals(fieldType.docValuesSkipIndexType()) == false; + this.useBinaryDocValues = false; } public KeywordFieldType(String name, NamedAnalyzer analyzer) { @@ -639,6 +676,7 @@ public KeywordFieldType(String name, NamedAnalyzer analyzer) { this.isDimension = false; this.indexSortConfig = null; this.hasDocValuesSkipper = false; + this.useBinaryDocValues = false; } @Override @@ -799,6 +837,10 @@ NamedAnalyzer normalizer() { @Override public BlockLoader blockLoader(BlockLoaderContext blContext) { + if (useBinaryDocValues) { + return new BlockDocValuesReader.BytesRefsFromBinaryBlockLoader(name()); + } + if (hasDocValues() && (blContext.fieldExtractPreference() != FieldExtractPreference.STORED || isSyntheticSourceEnabled())) { return new BlockDocValuesReader.BytesRefsFromOrdsBlockLoader(name()); } @@ -1119,6 +1161,7 @@ public Query automatonQuery( private final boolean forceDocValuesSkipper; private final String offsetsFieldName; private final SourceKeepMode indexSourceKeepMode; + private final boolean useBinaryDocValues; private KeywordFieldMapper( String simpleName, @@ -1127,7 +1170,8 @@ private KeywordFieldMapper( BuilderParams builderParams, Builder builder, String offsetsFieldName, - SourceKeepMode indexSourceKeepMode + SourceKeepMode indexSourceKeepMode, + boolean useBinaryDocValues ) { super(simpleName, mappedFieldType, builderParams); assert fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) <= 0; @@ -1148,6 +1192,7 @@ private KeywordFieldMapper( this.forceDocValuesSkipper = builder.forceDocValuesSkipper; this.offsetsFieldName = offsetsFieldName; this.indexSourceKeepMode = indexSourceKeepMode; + this.useBinaryDocValues = useBinaryDocValues; } @Override @@ -1210,6 +1255,13 @@ private boolean indexValue(DocumentParserContext context, XContentString value) return false; } + if (useBinaryDocValues) { + var utfBytes = value.bytes(); + var binaryValue = new BytesRef(utfBytes.bytes(), utfBytes.offset(), utfBytes.length()); + context.doc().add(new BinaryDocValuesField(fieldType().name(), binaryValue)); + return true; + } + // if the value's length exceeds ignore_above, then don't index it if (fieldType().ignoreAbove().isIgnored(value)) { context.addIgnoredField(fullPath()); @@ -1316,7 +1368,8 @@ public FieldMapper.Builder getMergeBuilder() { enableDocValuesSkipper, forceDocValuesSkipper, indexSourceKeepMode, - fieldType().isWithinMultiField() + fieldType().isWithinMultiField(), + useBinaryDocValues ).dimension(fieldType().isDimension()).init(this); } @@ -1365,22 +1418,26 @@ protected void writeValue(Object value, XContentBuilder b) throws IOException { } }); } else if (hasDocValues) { - if (offsetsFieldName != null) { - layers.add(new SortedSetWithOffsetsDocValuesSyntheticFieldLoaderLayer(fullPath(), offsetsFieldName)); + if (useBinaryDocValues) { + layers.add(new BinarySyntheticFieldLoader()); } else { - layers.add(new SortedSetDocValuesSyntheticFieldLoaderLayer(fullPath()) { - - @Override - protected BytesRef convert(BytesRef value) { - return value; - } - - @Override - protected BytesRef preserve(BytesRef value) { - // Preserve must make a deep copy because convert gets a shallow copy from the iterator - return BytesRef.deepCopyOf(value); - } - }); + if (offsetsFieldName != null) { + layers.add(new SortedSetWithOffsetsDocValuesSyntheticFieldLoaderLayer(fullPath(), offsetsFieldName)); + } else { + layers.add(new SortedSetDocValuesSyntheticFieldLoaderLayer(fullPath()) { + + @Override + protected BytesRef convert(BytesRef value) { + return value; + } + + @Override + protected BytesRef preserve(BytesRef value) { + // Preserve must make a deep copy because convert gets a shallow copy from the iterator + return BytesRef.deepCopyOf(value); + } + }); + } } } @@ -1399,4 +1456,50 @@ protected void writeValue(Object value, XContentBuilder b) throws IOException { return new CompositeSyntheticFieldLoader(leafFieldName, fullFieldName, layers); } + + final class BinarySyntheticFieldLoader implements CompositeSyntheticFieldLoader.DocValuesLayer { + private int docValueCount; + private BytesRef docValueBytes; + + @Override + public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf) throws IOException { + BinaryDocValues values = leafReader.getBinaryDocValues(fullPath()); + if (values == null) { + docValueCount = 0; + return null; + } + + return docId -> { + if (values.advanceExact(docId) == false) { + docValueCount = 0; + return hasValue(); + } + docValueBytes = BytesRef.deepCopyOf(values.binaryValue()); + docValueCount = 1; + return hasValue(); + }; + } + + @Override + public boolean hasValue() { + return docValueCount > 0; + } + + @Override + public long valueCount() { + return docValueCount; + } + + @Override + public void write(XContentBuilder b) throws IOException { + if (hasValue()) { + b.utf8Value(docValueBytes.bytes, docValueBytes.offset, docValueBytes.length); + } + } + + @Override + public String fieldName() { + return fullPath(); + } + } } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldTypeTests.java index 815f19ec2cfb6..ed0038b4645a5 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldTypeTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldTypeTests.java @@ -322,7 +322,8 @@ public void testIgnoreAboveIndexLevelSetting() { mock(NamedAnalyzer.class), mock(NamedAnalyzer.class), builder, - true + true, + false ); // when/then @@ -354,7 +355,8 @@ public void testIgnoreAboveIsSetReturnsTrueWhenIgnoreAboveIsGiven() { mock(NamedAnalyzer.class), mock(NamedAnalyzer.class), builder, - true + true, + false ); // when/then @@ -385,7 +387,8 @@ public void testIgnoreAboveIsSetReturnsFalseWhenIgnoreAboveIsNotGiven() { mock(NamedAnalyzer.class), mock(NamedAnalyzer.class), builder, - true + true, + false ); // when/then @@ -417,7 +420,8 @@ public void testIgnoreAboveIsSetReturnsFalseWhenIgnoreAboveIsGivenButItsTheSameA mock(NamedAnalyzer.class), mock(NamedAnalyzer.class), builder, - true + true, + false ); // when/then @@ -449,7 +453,8 @@ public void testIgnoreAboveIsSetReturnsFalseWhenIgnoreAboveIsGivenButItsTheSameA mock(NamedAnalyzer.class), mock(NamedAnalyzer.class), builder, - true + true, + false ); // when/then @@ -481,7 +486,8 @@ public void testIgnoreAboveIsSetReturnsTrueWhenIgnoreAboveIsGivenAsLogsdbDefault mock(NamedAnalyzer.class), mock(NamedAnalyzer.class), builder, - true + true, + false ); // when/then @@ -513,7 +519,8 @@ public void testIgnoreAboveIsSetReturnsTrueWhenIgnoreAboveIsConfiguredAtIndexLev mock(NamedAnalyzer.class), mock(NamedAnalyzer.class), builder, - true + true, + false ); // when/then diff --git a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldTypeTests.java index 82f1d3a0b687c..62dfb2d90fc68 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldTypeTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldTypeTests.java @@ -357,7 +357,8 @@ public void testBlockLoaderDoesNotUseSyntheticSourceDelegateWhenIgnoreAboveIsSet mock(NamedAnalyzer.class), mock(NamedAnalyzer.class), builder, - true + true, + false ); TextFieldType ft = new TextFieldType( @@ -406,7 +407,8 @@ public void testBlockLoaderDoesNotUseSyntheticSourceDelegateWhenIgnoreAboveIsSet mock(NamedAnalyzer.class), mock(NamedAnalyzer.class), builder, - true + true, + false ); TextFieldType ft = new TextFieldType( diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValueSourceReaderTypeConversionTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValueSourceReaderTypeConversionTests.java index 4e562217a00e3..1d18d479d3265 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValueSourceReaderTypeConversionTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValueSourceReaderTypeConversionTests.java @@ -1393,8 +1393,8 @@ private KeywordFieldMapper.KeywordFieldType storedKeywordField(String name) { Lucene.KEYWORD_ANALYZER, Lucene.KEYWORD_ANALYZER, new KeywordFieldMapper.Builder(name, IndexVersion.current()).docValues(false), - true // TODO randomize - load from stored keyword fields if stored even in synthetic source - ); + true, // TODO randomize - load from stored keyword fields if stored even in synthetic source + useBinaryDocValues); } @AwaitsFix(bugUrl = "Get working for multiple indices") diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValuesSourceReaderOperatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValuesSourceReaderOperatorTests.java index 3a83d365f6f57..dd51c21f1707f 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValuesSourceReaderOperatorTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValuesSourceReaderOperatorTests.java @@ -1580,8 +1580,8 @@ private KeywordFieldMapper.KeywordFieldType storedKeywordField(String name) { Lucene.KEYWORD_ANALYZER, Lucene.KEYWORD_ANALYZER, new KeywordFieldMapper.Builder(name, IndexVersion.current()).docValues(false), - true // TODO randomize - load from stored keyword fields if stored even in synthetic source - ); + true, // TODO randomize - load from stored keyword fields if stored even in synthetic source + useBinaryDocValues); } private TextFieldMapper.TextFieldType storedTextField(String name) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java index 29c98a073938e..3cec096140263 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java @@ -240,7 +240,8 @@ static MappedFieldType createUnmappedFieldType(String name, DefaultShardContext Lucene.KEYWORD_ANALYZER, Lucene.KEYWORD_ANALYZER, builder, - context.ctx.isSourceSynthetic() + context.ctx.isSourceSynthetic(), + false ); } } diff --git a/x-pack/plugin/logsdb/src/yamlRestTest/resources/rest-api-spec/test/keyword_use_binary_doc_values.yml b/x-pack/plugin/logsdb/src/yamlRestTest/resources/rest-api-spec/test/keyword_use_binary_doc_values.yml new file mode 100644 index 0000000000000..2f4d22ba233e5 --- /dev/null +++ b/x-pack/plugin/logsdb/src/yamlRestTest/resources/rest-api-spec/test/keyword_use_binary_doc_values.yml @@ -0,0 +1,117 @@ +--- +setup: + - do: + indices.create: + index: my-index + body: + settings: + index: + mapping: + use_binary_doc_values: true + mode: logsdb + mappings: + properties: + "@timestamp": + type: date + host.name: + type: keyword + agent_id: + type: keyword + process_id: + type: integer + http_method: + type: keyword + is_https: + type: boolean + location: + type: geo_point + message: + type: text + + - do: + bulk: + index: my-index + refresh: true + body: + - { "index": { } } + - { "@timestamp": "2024-02-12T10:30:00Z", "host.name": "foo", "agent_id": "darth-vader", "process_id": 101, "http_method": "GET", "is_https": false, "location": {"lat" : 40.7128, "lon" : -74.0060}, "message": "No, I am your father." } + - { "index": { } } + - { "@timestamp": "2024-02-12T10:31:00Z", "host.name": "bar", "agent_id": "yoda", "process_id": 102, "http_method": "PUT", "is_https": false, "location": {"lat" : 40.7128, "lon" : -74.0060}, "message": "Do. Or do not. There is no try." } + - { "index": { } } + - { "@timestamp": "2024-02-12T10:32:00Z", "host.name": "foo", "agent_id": "obi-wan", "process_id": 103, "http_method": "GET", "is_https": false, "location": {"lat" : 40.7128, "lon" : -74.0060}, "message": "May the force be with you." } + - { "index": { } } + - { "@timestamp": "2024-02-12T10:33:00Z", "host.name": "baz", "agent_id": "darth-vader", "process_id": 102, "http_method": "POST", "is_https": true, "location": {"lat" : 40.7128, "lon" : -74.0060}, "message": "I find your lack of faith disturbing." } + - { "index": { } } + - { "@timestamp": "2024-02-12T10:34:00Z", "host.name": "baz", "agent_id": "yoda", "process_id": 104, "http_method": "POST", "is_https": false, "location": {"lat" : 40.7128, "lon" : -74.0060}, "message": "Wars not make one great." } + - { "index": { } } + - { "@timestamp": "2024-02-12T10:35:00Z", "host.name": "foo", "agent_id": "obi-wan", "process_id": 105, "http_method": "GET", "is_https": false, "location": {"lat" : 40.7128, "lon" : -74.0060}, "message": "That's no moon. It's a space station." } + +--- +teardown: + - do: + indices.delete: + index: my-index + +--- +"Simple from": + - do: + esql.query: + body: + query: 'FROM my-index | SORT host.name, @timestamp | LIMIT 1' + + - match: {columns.0.name: "@timestamp"} + - match: {columns.0.type: "date"} + - match: {columns.1.name: "agent_id"} + - match: {columns.1.type: "keyword"} + - match: {columns.2.name: "host.name"} + - match: {columns.2.type: "keyword"} + - match: {columns.3.name: "http_method" } + - match: {columns.3.type: "keyword" } + - match: {columns.4.name: "is_https"} + - match: {columns.4.type: "boolean"} + - match: {columns.5.name: "location"} + - match: {columns.5.type: "geo_point"} + - match: {columns.6.name: "message"} + - match: {columns.6.type: "text"} + - match: {columns.7.name: "process_id"} + - match: {columns.7.type: "integer"} + + - match: {values.0.0: "2024-02-12T10:31:00.000Z"} + - match: {values.0.1: "yoda"} + - match: {values.0.2: "bar"} + - match: {values.0.3: "PUT"} + - match: {values.0.4: false} + - match: {values.0.5: "POINT (-74.00600004941225 40.712799984030426)"} + - match: {values.0.6: "Do. Or do not. There is no try."} + - match: {values.0.7: 102} + +--- +"Simple from keyword fields": + - do: + esql.query: + body: + query: 'FROM my-index | SORT host.name, @timestamp | KEEP agent_id, http_method | LIMIT 10' + profile: true + + - match: {columns.0.name: "agent_id"} + - match: {columns.0.type: "keyword"} + - match: {columns.1.name: "http_method"} + - match: {columns.1.type: "keyword"} + + - match: {values.0.0: "yoda"} + - match: {values.0.1: "PUT"} + - match: {values.1.0: "darth-vader"} + - match: {values.1.1: "POST"} + - match: {values.2.0: "yoda"} + - match: {values.2.1: "POST"} + - match: {values.3.0: "darth-vader"} + - match: {values.3.1: "GET"} + - match: {values.4.0: "obi-wan"} + - match: {values.4.1: "GET"} + - match: {values.5.0: "obi-wan"} + - match: {values.5.1: "GET"} + + - match: {profile.drivers.0.description: "data"} + - match: {profile.drivers.0.operators.1.operator: "ValuesSourceReaderOperator[fields = [@timestamp, agent_id, host.name, http_method]]"} + - match: {profile.drivers.0.operators.1.status.readers_built.agent_id:row_stride:BlockDocValuesReader\\.Bytes: 1} + - match: {profile.drivers.0.operators.1.status.readers_built.http_method:row_stride:BlockDocValuesReader\\.Bytes: 1} From c9a3a84d40676700e9b4160a658e8fa7eaced302 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Mon, 6 Oct 2025 10:30:40 +0000 Subject: [PATCH 02/15] [CI] Auto commit changes from spotless --- .../_nightly/esql/ValuesSourceReaderBenchmark.java | 3 ++- .../index/mapper/extras/MatchOnlyTextFieldTypeTests.java | 6 ++++-- .../lucene/read/ValueSourceReaderTypeConversionTests.java | 3 ++- .../lucene/read/ValuesSourceReaderOperatorTests.java | 3 ++- 4 files changed, 10 insertions(+), 5 deletions(-) diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/_nightly/esql/ValuesSourceReaderBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/_nightly/esql/ValuesSourceReaderBenchmark.java index b9756c267e0b5..6add290ca52b5 100644 --- a/benchmarks/src/main/java/org/elasticsearch/benchmark/_nightly/esql/ValuesSourceReaderBenchmark.java +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/_nightly/esql/ValuesSourceReaderBenchmark.java @@ -223,7 +223,8 @@ private static BlockLoader blockLoader(String name) { Lucene.KEYWORD_ANALYZER, new KeywordFieldMapper.Builder(name, IndexVersion.current()).docValues(ft.docValuesType() != DocValuesType.NONE), syntheticSource, - useBinaryDocValues).blockLoader(new MappedFieldType.BlockLoaderContext() { + useBinaryDocValues + ).blockLoader(new MappedFieldType.BlockLoaderContext() { @Override public String indexName() { return "benchmark"; diff --git a/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldTypeTests.java b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldTypeTests.java index bbbdacea062e3..41a0559453372 100644 --- a/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldTypeTests.java +++ b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldTypeTests.java @@ -299,7 +299,8 @@ public void testBlockLoaderDoesNotUseSyntheticSourceDelegateWhenIgnoreAboveIsSet mock(NamedAnalyzer.class), builder, true, - useBinaryDocValues); + useBinaryDocValues + ); MatchOnlyTextFieldMapper.MatchOnlyTextFieldType ft = new MatchOnlyTextFieldMapper.MatchOnlyTextFieldType( "parent", @@ -347,7 +348,8 @@ public void testBlockLoaderDoesNotUseSyntheticSourceDelegateWhenIgnoreAboveIsSet mock(NamedAnalyzer.class), builder, true, - useBinaryDocValues); + useBinaryDocValues + ); MatchOnlyTextFieldMapper.MatchOnlyTextFieldType ft = new MatchOnlyTextFieldMapper.MatchOnlyTextFieldType( "parent", diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValueSourceReaderTypeConversionTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValueSourceReaderTypeConversionTests.java index 1d18d479d3265..0636b6027b57a 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValueSourceReaderTypeConversionTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValueSourceReaderTypeConversionTests.java @@ -1394,7 +1394,8 @@ private KeywordFieldMapper.KeywordFieldType storedKeywordField(String name) { Lucene.KEYWORD_ANALYZER, new KeywordFieldMapper.Builder(name, IndexVersion.current()).docValues(false), true, // TODO randomize - load from stored keyword fields if stored even in synthetic source - useBinaryDocValues); + useBinaryDocValues + ); } @AwaitsFix(bugUrl = "Get working for multiple indices") diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValuesSourceReaderOperatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValuesSourceReaderOperatorTests.java index dd51c21f1707f..2b0db9bfa484b 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValuesSourceReaderOperatorTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValuesSourceReaderOperatorTests.java @@ -1581,7 +1581,8 @@ private KeywordFieldMapper.KeywordFieldType storedKeywordField(String name) { Lucene.KEYWORD_ANALYZER, new KeywordFieldMapper.Builder(name, IndexVersion.current()).docValues(false), true, // TODO randomize - load from stored keyword fields if stored even in synthetic source - useBinaryDocValues); + useBinaryDocValues + ); } private TextFieldMapper.TextFieldType storedTextField(String name) { From d2b30bd592268fd27c1a0d7e691f3f2d358addbf Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Mon, 6 Oct 2025 12:49:13 +0200 Subject: [PATCH 03/15] fix compile error --- .../benchmark/_nightly/esql/ValuesSourceReaderBenchmark.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/_nightly/esql/ValuesSourceReaderBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/_nightly/esql/ValuesSourceReaderBenchmark.java index 6add290ca52b5..cb929d4437722 100644 --- a/benchmarks/src/main/java/org/elasticsearch/benchmark/_nightly/esql/ValuesSourceReaderBenchmark.java +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/_nightly/esql/ValuesSourceReaderBenchmark.java @@ -223,7 +223,7 @@ private static BlockLoader blockLoader(String name) { Lucene.KEYWORD_ANALYZER, new KeywordFieldMapper.Builder(name, IndexVersion.current()).docValues(ft.docValuesType() != DocValuesType.NONE), syntheticSource, - useBinaryDocValues + false ).blockLoader(new MappedFieldType.BlockLoaderContext() { @Override public String indexName() { From 511b0328825c2bb86987b6a24e6a99f9874ff3dd Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Mon, 6 Oct 2025 12:50:56 +0200 Subject: [PATCH 04/15] Fix clickbench queries with wildcard queries to work work with binary doc values. --- .../index/mapper/KeywordFieldMapper.java | 13 ++++- .../BinaryDocValuesStringFieldScript.java | 56 +++++++++++++++++++ 2 files changed, 67 insertions(+), 2 deletions(-) create mode 100644 server/src/main/java/org/elasticsearch/script/BinaryDocValuesStringFieldScript.java diff --git a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java index eee62b72ccac8..3a65ea0fd3312 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java @@ -53,12 +53,16 @@ import org.elasticsearch.index.fielddata.FieldData; import org.elasticsearch.index.fielddata.FieldDataContext; import org.elasticsearch.index.fielddata.IndexFieldData; +import org.elasticsearch.index.fielddata.IndexFieldDataCache; import org.elasticsearch.index.fielddata.SourceValueFetcherSortedBinaryIndexFieldData; import org.elasticsearch.index.fielddata.StoredFieldSortedBinaryIndexFieldData; +import org.elasticsearch.index.fielddata.plain.BinaryIndexFieldData; import org.elasticsearch.index.fielddata.plain.SortedSetOrdinalsIndexFieldData; import org.elasticsearch.index.query.AutomatonQueryWithDescription; import org.elasticsearch.index.query.SearchExecutionContext; import org.elasticsearch.index.similarity.SimilarityProvider; +import org.elasticsearch.indices.breaker.CircuitBreakerService; +import org.elasticsearch.script.BinaryDocValuesStringFieldScript; import org.elasticsearch.script.Script; import org.elasticsearch.script.ScriptCompiler; import org.elasticsearch.script.SortedSetDocValuesStringFieldScript; @@ -956,7 +960,10 @@ protected BytesRef storedToBytesRef(Object stored) { ); } - private SortedSetOrdinalsIndexFieldData.Builder fieldDataFromDocValues() { + private IndexFieldData.Builder fieldDataFromDocValues() { + if (useBinaryDocValues) { + return new BinaryIndexFieldData.Builder(name(), CoreValuesSourceType.KEYWORD); + } return new SortedSetOrdinalsIndexFieldData.Builder( name(), CoreValuesSourceType.KEYWORD, @@ -1041,7 +1048,9 @@ public Query wildcardQuery( } return new StringScriptFieldWildcardQuery( new Script(""), - ctx -> new SortedSetDocValuesStringFieldScript(name(), context.lookup(), ctx), + ctx -> useBinaryDocValues + ? new BinaryDocValuesStringFieldScript(name(), context.lookup(), ctx) + : new SortedSetDocValuesStringFieldScript(name(), context.lookup(), ctx), name(), value, caseInsensitive diff --git a/server/src/main/java/org/elasticsearch/script/BinaryDocValuesStringFieldScript.java b/server/src/main/java/org/elasticsearch/script/BinaryDocValuesStringFieldScript.java new file mode 100644 index 0000000000000..be5aba49ec820 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/script/BinaryDocValuesStringFieldScript.java @@ -0,0 +1,56 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.script; + +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.index.mapper.OnScriptError; +import org.elasticsearch.search.lookup.SearchLookup; + +import java.io.IOException; +import java.util.Map; + +public class BinaryDocValuesStringFieldScript extends StringFieldScript { + private final BinaryDocValues binaryDocValues; + + boolean hasValue = false; + + public BinaryDocValuesStringFieldScript(String fieldName, SearchLookup searchLookup, LeafReaderContext ctx) { + super(fieldName, Map.of(), searchLookup, OnScriptError.FAIL, ctx); + try { + binaryDocValues = DocValues.getBinary(ctx.reader(), fieldName); + } catch (IOException e) { + throw new IllegalStateException("Cannot load doc values", e); + } + } + + @Override + public void setDocument(int docID) { + try { + hasValue = binaryDocValues.advanceExact(docID); + } catch (IOException e) { + throw new IllegalStateException("Cannot load doc values", e); + } + } + + @Override + public void execute() { + try { + if (hasValue) { + BytesRef bytesRef = binaryDocValues.binaryValue(); + emit(bytesRef.utf8ToString()); + } + } catch (IOException e) { + throw new IllegalStateException("Cannot load doc values", e); + } + } +} From adfca29fb5741a399096c6781475e2b85a7d20c6 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Mon, 6 Oct 2025 12:56:50 +0200 Subject: [PATCH 05/15] bulk load dense binary doc values. --- .../es819/ES819TSDBDocValuesProducer.java | 44 ++++++++++++++++++- .../index/mapper/BlockDocValuesReader.java | 11 +++++ 2 files changed, 54 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java index 5d90f2814853d..e26f8d04965ae 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java @@ -208,6 +208,26 @@ public BytesRef binaryValue() throws IOException { bytesSlice.readBytes((long) doc * length, bytes.bytes, 0, length); return bytes; } + + @Override + public BlockLoader.Block tryRead( + BlockLoader.BlockFactory factory, + BlockLoader.Docs docs, + int offset, + boolean nullsFiltered, + BlockDocValuesReader.ToDouble toDouble, + boolean toInt + ) throws IOException { + int count = docs.count() - offset; + try (var builder = factory.bytesRefs(count)) { + for (int i = offset; i < docs.count(); i++) { + doc = docs.get(i); + bytesSlice.readBytes((long) doc * length, bytes.bytes, 0, length); + builder.appendBytesRef(bytes); + } + return builder.build(); + } + } }; } else { // variable length @@ -223,6 +243,28 @@ public BytesRef binaryValue() throws IOException { bytesSlice.readBytes(startOffset, bytes.bytes, 0, bytes.length); return bytes; } + + @Override + public BlockLoader.Block tryRead( + BlockLoader.BlockFactory factory, + BlockLoader.Docs docs, + int offset, + boolean nullsFiltered, + BlockDocValuesReader.ToDouble toDouble, + boolean toInt + ) throws IOException { + int count = docs.count() - offset; + try (var builder = factory.bytesRefs(count)) { + for (int i = offset; i < docs.count(); i++) { + doc = docs.get(i); + long startOffset = addresses.get(doc); + bytes.length = (int) (addresses.get(doc + 1L) - startOffset); + bytesSlice.readBytes(startOffset, bytes.bytes, 0, bytes.length); + builder.appendBytesRef(bytes); + } + return builder.build(); + } + } }; } } else { @@ -267,7 +309,7 @@ public BytesRef binaryValue() throws IOException { } } - private abstract static class DenseBinaryDocValues extends BinaryDocValues { + private abstract static class DenseBinaryDocValues extends BinaryDocValues implements BlockLoader.OptionalColumnAtATimeReader { final int maxDoc; int doc = -1; diff --git a/server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java b/server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java index 795401dd3e3e3..de35a85e8878c 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java @@ -1032,6 +1032,17 @@ public BytesRefsFromBinary(BinaryDocValues docValues) { super(docValues); } + @Override + public BlockLoader.Block read(BlockFactory factory, Docs docs, int offset, boolean nullsFiltered) throws IOException { + if (docValues instanceof BlockLoader.OptionalColumnAtATimeReader direct) { + BlockLoader.Block block = direct.tryRead(factory, docs, offset, nullsFiltered, null, false); + if (block != null) { + return block; + } + } + return super.read(factory, docs, offset, nullsFiltered); + } + @Override void read(int doc, BytesRefBuilder builder) throws IOException { if (false == docValues.advanceExact(doc)) { From 32c1d7b633cbf751ce77676e56ac5a55a3eb13f8 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Mon, 6 Oct 2025 13:38:27 +0200 Subject: [PATCH 06/15] test compile errors --- .../index/mapper/extras/MatchOnlyTextFieldTypeTests.java | 4 ++-- .../compute/lucene/read/ValuesSourceReaderOperatorTests.java | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldTypeTests.java b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldTypeTests.java index 41a0559453372..0042810241be7 100644 --- a/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldTypeTests.java +++ b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldTypeTests.java @@ -299,7 +299,7 @@ public void testBlockLoaderDoesNotUseSyntheticSourceDelegateWhenIgnoreAboveIsSet mock(NamedAnalyzer.class), builder, true, - useBinaryDocValues + false ); MatchOnlyTextFieldMapper.MatchOnlyTextFieldType ft = new MatchOnlyTextFieldMapper.MatchOnlyTextFieldType( @@ -348,7 +348,7 @@ public void testBlockLoaderDoesNotUseSyntheticSourceDelegateWhenIgnoreAboveIsSet mock(NamedAnalyzer.class), builder, true, - useBinaryDocValues + false ); MatchOnlyTextFieldMapper.MatchOnlyTextFieldType ft = new MatchOnlyTextFieldMapper.MatchOnlyTextFieldType( diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValuesSourceReaderOperatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValuesSourceReaderOperatorTests.java index 2b0db9bfa484b..8c0e57e942d05 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValuesSourceReaderOperatorTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValuesSourceReaderOperatorTests.java @@ -1581,7 +1581,7 @@ private KeywordFieldMapper.KeywordFieldType storedKeywordField(String name) { Lucene.KEYWORD_ANALYZER, new KeywordFieldMapper.Builder(name, IndexVersion.current()).docValues(false), true, // TODO randomize - load from stored keyword fields if stored even in synthetic source - useBinaryDocValues + false ); } From 5985192241f9d4259fef8c38b1245d33cd52ca4b Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Mon, 6 Oct 2025 14:57:10 +0200 Subject: [PATCH 07/15] test compile errors --- .../lucene/read/ValueSourceReaderTypeConversionTests.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValueSourceReaderTypeConversionTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValueSourceReaderTypeConversionTests.java index 0636b6027b57a..79b14aa2c65ea 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValueSourceReaderTypeConversionTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValueSourceReaderTypeConversionTests.java @@ -1394,7 +1394,7 @@ private KeywordFieldMapper.KeywordFieldType storedKeywordField(String name) { Lucene.KEYWORD_ANALYZER, new KeywordFieldMapper.Builder(name, IndexVersion.current()).docValues(false), true, // TODO randomize - load from stored keyword fields if stored even in synthetic source - useBinaryDocValues + false ); } From e1b7d5057f19c7e1160cd87bac33e50cf74bb6cc Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Mon, 6 Oct 2025 13:04:48 +0000 Subject: [PATCH 08/15] [CI] Auto commit changes from spotless --- .../java/org/elasticsearch/index/mapper/KeywordFieldMapper.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java index 3a65ea0fd3312..1ffa4a67bd248 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java @@ -53,7 +53,6 @@ import org.elasticsearch.index.fielddata.FieldData; import org.elasticsearch.index.fielddata.FieldDataContext; import org.elasticsearch.index.fielddata.IndexFieldData; -import org.elasticsearch.index.fielddata.IndexFieldDataCache; import org.elasticsearch.index.fielddata.SourceValueFetcherSortedBinaryIndexFieldData; import org.elasticsearch.index.fielddata.StoredFieldSortedBinaryIndexFieldData; import org.elasticsearch.index.fielddata.plain.BinaryIndexFieldData; @@ -61,7 +60,6 @@ import org.elasticsearch.index.query.AutomatonQueryWithDescription; import org.elasticsearch.index.query.SearchExecutionContext; import org.elasticsearch.index.similarity.SimilarityProvider; -import org.elasticsearch.indices.breaker.CircuitBreakerService; import org.elasticsearch.script.BinaryDocValuesStringFieldScript; import org.elasticsearch.script.Script; import org.elasticsearch.script.ScriptCompiler; From e45360f1db3b15371377a7ed64f4d548125da2be Mon Sep 17 00:00:00 2001 From: Parker Timmins Date: Mon, 6 Oct 2025 14:24:46 -0500 Subject: [PATCH 09/15] Copy uncompressed addBinaryDocValues --- .../codec/tsdb/BinaryDVCompressionMode.java | 29 ++ .../es819/ES819TSDBDocValuesConsumer.java | 304 ++++++++++++++++++ .../tsdb/es819/ES819TSDBDocValuesFormat.java | 11 +- 3 files changed, 341 insertions(+), 3 deletions(-) create mode 100644 server/src/main/java/org/elasticsearch/index/codec/tsdb/BinaryDVCompressionMode.java diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/BinaryDVCompressionMode.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/BinaryDVCompressionMode.java new file mode 100644 index 0000000000000..ddbd6d493c3b1 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/BinaryDVCompressionMode.java @@ -0,0 +1,29 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.index.codec.tsdb; + +public enum BinaryDVCompressionMode { + + NO_COMPRESS((byte) 0), + COMPRESSED_WITH_LZ4((byte) 1); + + public final byte code; + + BinaryDVCompressionMode(byte code) { + this.code = code; + } + + static BinaryDVCompressionMode fromMode(byte mode) { + return switch (mode) { + case 0 -> NO_COMPRESS; + case 1 -> COMPRESSED_WITH_LZ4; + default -> throw new IllegalStateException("unknown compression mode [" + mode + "]"); + }; + } +} diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java index 968e50eaf32be..51c6056ac70f4 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java @@ -9,10 +9,12 @@ package org.elasticsearch.index.codec.tsdb.es819; +import org.apache.lucene.backward_codecs.store.EndiannessReverserUtil; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.DocValuesProducer; import org.apache.lucene.codecs.lucene90.IndexedDISI; import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValuesSkipIndexType; import org.apache.lucene.index.FieldInfo; @@ -29,6 +31,7 @@ import org.apache.lucene.store.ByteArrayDataOutput; import org.apache.lucene.store.ByteBuffersDataOutput; import org.apache.lucene.store.ByteBuffersIndexOutput; +import org.apache.lucene.store.ChecksumIndexInput; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexOutput; @@ -41,8 +44,10 @@ import org.apache.lucene.util.packed.DirectMonotonicWriter; import org.apache.lucene.util.packed.PackedInts; import org.elasticsearch.core.IOUtils; +import org.elasticsearch.index.codec.tsdb.BinaryDVCompressionMode; import org.elasticsearch.index.codec.tsdb.TSDBDocValuesEncoder; +import java.io.Closeable; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; @@ -65,9 +70,12 @@ final class ES819TSDBDocValuesConsumer extends XDocValuesConsumer { private final int minDocsPerOrdinalForOrdinalRangeEncoding; final boolean enableOptimizedMerge; private final int primarySortFieldNumber; + final SegmentWriteState state; + final BinaryDVCompressionMode binaryDVCompressionMode; ES819TSDBDocValuesConsumer( SegmentWriteState state, + BinaryDVCompressionMode binaryDVCompressionMode, int skipIndexIntervalSize, int minDocsPerOrdinalForOrdinalRangeEncoding, boolean enableOptimizedMerge, @@ -76,6 +84,8 @@ final class ES819TSDBDocValuesConsumer extends XDocValuesConsumer { String metaCodec, String metaExtension ) throws IOException { + this.state = state; + this.binaryDVCompressionMode = binaryDVCompressionMode; this.termsDictBuffer = new byte[1 << 14]; this.dir = state.directory; this.minDocsPerOrdinalForOrdinalRangeEncoding = minDocsPerOrdinalForOrdinalRangeEncoding; @@ -315,7 +325,143 @@ public void mergeBinaryField(FieldInfo mergeFieldInfo, MergeState mergeState) th public void addBinaryField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException { meta.writeInt(field.number); meta.writeByte(ES819TSDBDocValuesFormat.BINARY); + meta.writeByte(binaryDVCompressionMode.code); + switch (binaryDVCompressionMode) { + case NO_COMPRESS -> doAddUncompressedBinary(field, valuesProducer); + case COMPRESSED_WITH_LZ4 -> doAddCompressedBinaryLZ4(field, valuesProducer); + } + } + + public void doAddUncompressedBinary(FieldInfo field, DocValuesProducer valuesProducer) throws IOException { + if (valuesProducer instanceof TsdbDocValuesProducer tsdbValuesProducer && tsdbValuesProducer.mergeStats.supported()) { + final int numDocsWithField = tsdbValuesProducer.mergeStats.sumNumDocsWithField(); + final int minLength = tsdbValuesProducer.mergeStats.minLength(); + final int maxLength = tsdbValuesProducer.mergeStats.maxLength(); + + assert numDocsWithField <= maxDoc; + + BinaryDocValues values = valuesProducer.getBinary(field); + long start = data.getFilePointer(); + meta.writeLong(start); // dataOffset + + OffsetsAccumulator offsetsAccumulator = null; + DISIAccumulator disiAccumulator = null; + try { + if (numDocsWithField > 0 && numDocsWithField < maxDoc) { + disiAccumulator = new DISIAccumulator(dir, context, data, IndexedDISI.DEFAULT_DENSE_RANK_POWER); + } + + assert maxLength >= minLength; + if (maxLength > minLength) { + offsetsAccumulator = new OffsetsAccumulator(dir, context, data, numDocsWithField); + } + + for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) { + BytesRef v = values.binaryValue(); + data.writeBytes(v.bytes, v.offset, v.length); + if (disiAccumulator != null) { + disiAccumulator.addDocId(doc); + } + if (offsetsAccumulator != null) { + offsetsAccumulator.addDoc(v.length); + } + } + meta.writeLong(data.getFilePointer() - start); // dataLength + + if (numDocsWithField == 0) { + meta.writeLong(-2); // docsWithFieldOffset + meta.writeLong(0L); // docsWithFieldLength + meta.writeShort((short) -1); // jumpTableEntryCount + meta.writeByte((byte) -1); // denseRankPower + } else if (numDocsWithField == maxDoc) { + meta.writeLong(-1); // docsWithFieldOffset + meta.writeLong(0L); // docsWithFieldLength + meta.writeShort((short) -1); // jumpTableEntryCount + meta.writeByte((byte) -1); // denseRankPower + } else { + long offset = data.getFilePointer(); + meta.writeLong(offset); // docsWithFieldOffset + final short jumpTableEntryCount = disiAccumulator.build(data); + meta.writeLong(data.getFilePointer() - offset); // docsWithFieldLength + meta.writeShort(jumpTableEntryCount); + meta.writeByte(IndexedDISI.DEFAULT_DENSE_RANK_POWER); + } + + meta.writeInt(numDocsWithField); + meta.writeInt(minLength); + meta.writeInt(maxLength); + if (offsetsAccumulator != null) { + offsetsAccumulator.build(meta, data); + } + } finally { + IOUtils.close(disiAccumulator, offsetsAccumulator); + } + } else { + BinaryDocValues values = valuesProducer.getBinary(field); + long start = data.getFilePointer(); + meta.writeLong(start); // dataOffset + int numDocsWithField = 0; + int minLength = Integer.MAX_VALUE; + int maxLength = 0; + for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) { + numDocsWithField++; + BytesRef v = values.binaryValue(); + int length = v.length; + data.writeBytes(v.bytes, v.offset, v.length); + minLength = Math.min(length, minLength); + maxLength = Math.max(length, maxLength); + } + assert numDocsWithField <= maxDoc; + meta.writeLong(data.getFilePointer() - start); // dataLength + + if (numDocsWithField == 0) { + meta.writeLong(-2); // docsWithFieldOffset + meta.writeLong(0L); // docsWithFieldLength + meta.writeShort((short) -1); // jumpTableEntryCount + meta.writeByte((byte) -1); // denseRankPower + } else if (numDocsWithField == maxDoc) { + meta.writeLong(-1); // docsWithFieldOffset + meta.writeLong(0L); // docsWithFieldLength + meta.writeShort((short) -1); // jumpTableEntryCount + meta.writeByte((byte) -1); // denseRankPower + } else { + long offset = data.getFilePointer(); + meta.writeLong(offset); // docsWithFieldOffset + values = valuesProducer.getBinary(field); + final short jumpTableEntryCount = IndexedDISI.writeBitSet(values, data, IndexedDISI.DEFAULT_DENSE_RANK_POWER); + meta.writeLong(data.getFilePointer() - offset); // docsWithFieldLength + meta.writeShort(jumpTableEntryCount); + meta.writeByte(IndexedDISI.DEFAULT_DENSE_RANK_POWER); + } + + meta.writeInt(numDocsWithField); + meta.writeInt(minLength); + meta.writeInt(maxLength); + if (maxLength > minLength) { + start = data.getFilePointer(); + meta.writeLong(start); + meta.writeVInt(ES819TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT); + + final DirectMonotonicWriter writer = DirectMonotonicWriter.getInstance( + meta, + data, + numDocsWithField + 1, + ES819TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT + ); + long addr = 0; + writer.add(addr); + values = valuesProducer.getBinary(field); + for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) { + addr += values.binaryValue().length; + writer.add(addr); + } + writer.finish(); + meta.writeLong(data.getFilePointer() - start); + } + } + } + public void doAddCompressedBinaryLZ4(FieldInfo field, DocValuesProducer valuesProducer) throws IOException { if (valuesProducer instanceof TsdbDocValuesProducer tsdbValuesProducer && tsdbValuesProducer.mergeStats.supported()) { final int numDocsWithField = tsdbValuesProducer.mergeStats.sumNumDocsWithField(); final int minLength = tsdbValuesProducer.mergeStats.minLength(); @@ -444,6 +590,164 @@ public void addBinaryField(FieldInfo field, DocValuesProducer valuesProducer) th } } + static final int BINARY_BLOCK_SHIFT = 5; + static final int BINARY_DOCS_PER_COMPRESSED_BLOCK = 1 << BINARY_BLOCK_SHIFT; + + private class CompressedBinaryBlockWriter implements Closeable { + final LZ4.FastCompressionHashTable ht = new LZ4.FastCompressionHashTable(); + int uncompressedBlockLength = 0; + int maxUncompressedBlockLength = 0; + int numDocsInCurrentBlock = 0; + final int[] docLengths = new int[BINARY_DOCS_PER_COMPRESSED_BLOCK]; + byte[] block = BytesRef.EMPTY_BYTES; + int totalChunks = 0; + long maxPointer = 0; + final long blockAddressesStart; + + final IndexOutput tempBinaryOffsets; + + CompressedBinaryBlockWriter() throws IOException { + tempBinaryOffsets = EndiannessReverserUtil.createTempOutput( + state.directory, + state.segmentInfo.name, + "binary_pointers", + state.context + ); + boolean success = false; + try { + CodecUtil.writeHeader( + tempBinaryOffsets, + ES819TSDBDocValuesFormat.META_CODEC + "FilePointers", + ES819TSDBDocValuesFormat.VERSION_CURRENT + ); + blockAddressesStart = data.getFilePointer(); + success = true; + } finally { + if (success == false) { + IOUtils.closeWhileHandlingException(this); // self-close because constructor caller can't + } + } + } + + void addDoc(int doc, BytesRef v) throws IOException { + docLengths[numDocsInCurrentBlock] = v.length; + block = ArrayUtil.grow(block, uncompressedBlockLength + v.length); + System.arraycopy(v.bytes, v.offset, block, uncompressedBlockLength, v.length); + uncompressedBlockLength += v.length; + numDocsInCurrentBlock++; + if (numDocsInCurrentBlock == BINARY_DOCS_PER_COMPRESSED_BLOCK) { + flushData(); + } + } + + private void flushData() throws IOException { + if (numDocsInCurrentBlock > 0) { + // Write offset to this block to temporary offsets file + totalChunks++; + long thisBlockStartPointer = data.getFilePointer(); + + // Optimisation - check if all lengths are same + boolean allLengthsSame = true; + for (int i = 1; i < BINARY_DOCS_PER_COMPRESSED_BLOCK; i++) { + if (docLengths[i] != docLengths[i - 1]) { + allLengthsSame = false; + break; + } + } + if (allLengthsSame) { + // Only write one value shifted. Steal a bit to indicate all other lengths are the same + int onlyOneLength = (docLengths[0] << 1) | 1; + data.writeVInt(onlyOneLength); + } else { + for (int i = 0; i < BINARY_DOCS_PER_COMPRESSED_BLOCK; i++) { + if (i == 0) { + // Write first value shifted and steal a bit to indicate other lengths are to follow + int multipleLengths = (docLengths[0] << 1); + data.writeVInt(multipleLengths); + } else { + data.writeVInt(docLengths[i]); + } + } + } + maxUncompressedBlockLength = Math.max(maxUncompressedBlockLength, uncompressedBlockLength); + LZ4.compress(block, 0, uncompressedBlockLength, EndiannessReverserUtil.wrapDataOutput(data), ht); + numDocsInCurrentBlock = 0; + // Ensure initialized with zeroes because full array is always written + Arrays.fill(docLengths, 0); + uncompressedBlockLength = 0; + maxPointer = data.getFilePointer(); + tempBinaryOffsets.writeVLong(maxPointer - thisBlockStartPointer); + } + } + + void writeMetaData() throws IOException { + if (totalChunks == 0) { + return; + } + + long startDMW = data.getFilePointer(); + meta.writeLong(startDMW); + + meta.writeVInt(totalChunks); + meta.writeVInt(BINARY_BLOCK_SHIFT); + meta.writeVInt(maxUncompressedBlockLength); + meta.writeVInt(DIRECT_MONOTONIC_BLOCK_SHIFT); + + CodecUtil.writeFooter(tempBinaryOffsets); + IOUtils.close(tempBinaryOffsets); + // write the compressed block offsets info to the meta file by reading from temp file + try ( + ChecksumIndexInput filePointersIn = EndiannessReverserUtil.openChecksumInput( + state.directory, + tempBinaryOffsets.getName(), + IOContext.READONCE + ) + ) { + CodecUtil.checkHeader( + filePointersIn, + ES819TSDBDocValuesFormat.META_CODEC + "FilePointers", + ES819TSDBDocValuesFormat.VERSION_CURRENT, + ES819TSDBDocValuesFormat.VERSION_CURRENT + ); + Throwable priorE = null; + try { + final DirectMonotonicWriter filePointers = DirectMonotonicWriter.getInstance( + meta, + data, + totalChunks, + ES819TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT + ); + long fp = blockAddressesStart; + for (int i = 0; i < totalChunks; ++i) { + filePointers.add(fp); + fp += filePointersIn.readVLong(); + } + if (maxPointer < fp) { + throw new CorruptIndexException( + "File pointers don't add up (" + fp + " vs expected " + maxPointer + ")", + filePointersIn + ); + } + filePointers.finish(); + } catch (Throwable e) { + priorE = e; + } finally { + CodecUtil.checkFooter(filePointersIn, priorE); + } + } + // Write the length of the DMW block in the data + meta.writeLong(data.getFilePointer() - startDMW); + } + + @Override + public void close() throws IOException { + if (tempBinaryOffsets != null) { + IOUtils.close(tempBinaryOffsets, () -> state.directory.deleteFile(tempBinaryOffsets.getName())); + } + } + } + // END: Copied fom LUCENE-9211 + @Override public void addSortedField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException { meta.writeInt(field.number); diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java index fbdef488b8318..63029f382caf3 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java @@ -14,6 +14,7 @@ import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; import org.elasticsearch.core.SuppressForbidden; +import org.elasticsearch.index.codec.tsdb.BinaryDVCompressionMode; import java.io.IOException; @@ -47,7 +48,8 @@ public class ES819TSDBDocValuesFormat extends org.apache.lucene.codecs.DocValues static final byte SORTED_NUMERIC = 4; static final int VERSION_START = 0; - static final int VERSION_CURRENT = VERSION_START; + static final int VERSION_BINARY_DV_COMPRESSION = 1; + static final int VERSION_CURRENT = VERSION_BINARY_DV_COMPRESSION; static final int TERMS_DICT_BLOCK_LZ4_SHIFT = 6; static final int TERMS_DICT_BLOCK_LZ4_SIZE = 1 << TERMS_DICT_BLOCK_LZ4_SHIFT; @@ -119,15 +121,17 @@ private static boolean getOptimizedMergeEnabledDefault() { final int skipIndexIntervalSize; final int minDocsPerOrdinalForRangeEncoding; private final boolean enableOptimizedMerge; + final BinaryDVCompressionMode binaryDVCompressionMode; /** Default constructor. */ public ES819TSDBDocValuesFormat() { - this(DEFAULT_SKIP_INDEX_INTERVAL_SIZE, ORDINAL_RANGE_ENCODING_MIN_DOC_PER_ORDINAL, OPTIMIZED_MERGE_ENABLE_DEFAULT); + this(DEFAULT_SKIP_INDEX_INTERVAL_SIZE, ORDINAL_RANGE_ENCODING_MIN_DOC_PER_ORDINAL, OPTIMIZED_MERGE_ENABLE_DEFAULT, BinaryDVCompressionMode.COMPRESSED_WITH_LZ4); } /** Doc values fields format with specified skipIndexIntervalSize. */ - public ES819TSDBDocValuesFormat(int skipIndexIntervalSize, int minDocsPerOrdinalForRangeEncoding, boolean enableOptimizedMerge) { + public ES819TSDBDocValuesFormat(int skipIndexIntervalSize, int minDocsPerOrdinalForRangeEncoding, boolean enableOptimizedMerge, BinaryDVCompressionMode binaryDVCompressionMode) { super(CODEC_NAME); + this.binaryDVCompressionMode = binaryDVCompressionMode; if (skipIndexIntervalSize < 2) { throw new IllegalArgumentException("skipIndexIntervalSize must be > 1, got [" + skipIndexIntervalSize + "]"); } @@ -140,6 +144,7 @@ public ES819TSDBDocValuesFormat(int skipIndexIntervalSize, int minDocsPerOrdinal public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOException { return new ES819TSDBDocValuesConsumer( state, + binaryDVCompressionMode, skipIndexIntervalSize, minDocsPerOrdinalForRangeEncoding, enableOptimizedMerge, From 392f71727b7a098da3277647276ab7db0e3f36f4 Mon Sep 17 00:00:00 2001 From: Parker Timmins Date: Mon, 6 Oct 2025 14:27:50 -0500 Subject: [PATCH 10/15] Copy lz4 consumer code for non-optimized case --- .../es819/ES819TSDBDocValuesConsumer.java | 99 ++++++++----------- 1 file changed, 42 insertions(+), 57 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java index 51c6056ac70f4..1b71a558ae775 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java @@ -526,66 +526,51 @@ public void doAddCompressedBinaryLZ4(FieldInfo field, DocValuesProducer valuesPr IOUtils.close(disiAccumulator, offsetsAccumulator); } } else { - BinaryDocValues values = valuesProducer.getBinary(field); - long start = data.getFilePointer(); - meta.writeLong(start); // dataOffset - int numDocsWithField = 0; - int minLength = Integer.MAX_VALUE; - int maxLength = 0; - for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) { - numDocsWithField++; - BytesRef v = values.binaryValue(); - int length = v.length; - data.writeBytes(v.bytes, v.offset, v.length); - minLength = Math.min(length, minLength); - maxLength = Math.max(length, maxLength); - } - assert numDocsWithField <= maxDoc; - meta.writeLong(data.getFilePointer() - start); // dataLength - - if (numDocsWithField == 0) { - meta.writeLong(-2); // docsWithFieldOffset - meta.writeLong(0L); // docsWithFieldLength - meta.writeShort((short) -1); // jumpTableEntryCount - meta.writeByte((byte) -1); // denseRankPower - } else if (numDocsWithField == maxDoc) { - meta.writeLong(-1); // docsWithFieldOffset - meta.writeLong(0L); // docsWithFieldLength - meta.writeShort((short) -1); // jumpTableEntryCount - meta.writeByte((byte) -1); // denseRankPower - } else { - long offset = data.getFilePointer(); - meta.writeLong(offset); // docsWithFieldOffset - values = valuesProducer.getBinary(field); - final short jumpTableEntryCount = IndexedDISI.writeBitSet(values, data, IndexedDISI.DEFAULT_DENSE_RANK_POWER); - meta.writeLong(data.getFilePointer() - offset); // docsWithFieldLength - meta.writeShort(jumpTableEntryCount); - meta.writeByte(IndexedDISI.DEFAULT_DENSE_RANK_POWER); - } + try (CompressedBinaryBlockWriter blockWriter = new CompressedBinaryBlockWriter()) { + BinaryDocValues values = valuesProducer.getBinary(field); + long start = data.getFilePointer(); + meta.writeLong(start); // dataOffset + int numDocsWithField = 0; + int minLength = Integer.MAX_VALUE; + int maxLength = 0; + for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) { + numDocsWithField++; + BytesRef v = values.binaryValue(); + blockWriter.addDoc(doc, v); + int length = v.length; + minLength = Math.min(length, minLength); + maxLength = Math.max(length, maxLength); + } + blockWriter.flushData(); - meta.writeInt(numDocsWithField); - meta.writeInt(minLength); - meta.writeInt(maxLength); - if (maxLength > minLength) { - start = data.getFilePointer(); - meta.writeLong(start); - meta.writeVInt(ES819TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT); + assert numDocsWithField <= maxDoc; + meta.writeLong(data.getFilePointer() - start); // dataLength - final DirectMonotonicWriter writer = DirectMonotonicWriter.getInstance( - meta, - data, - numDocsWithField + 1, - ES819TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT - ); - long addr = 0; - writer.add(addr); - values = valuesProducer.getBinary(field); - for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) { - addr += values.binaryValue().length; - writer.add(addr); + if (numDocsWithField == 0) { + meta.writeLong(-2); // docsWithFieldOffset + meta.writeLong(0L); // docsWithFieldLength + meta.writeShort((short) -1); // jumpTableEntryCount + meta.writeByte((byte) -1); // denseRankPower + } else if (numDocsWithField == maxDoc) { + meta.writeLong(-1); // docsWithFieldOffset + meta.writeLong(0L); // docsWithFieldLength + meta.writeShort((short) -1); // jumpTableEntryCount + meta.writeByte((byte) -1); // denseRankPower + } else { + long offset = data.getFilePointer(); + meta.writeLong(offset); // docsWithFieldOffset + values = valuesProducer.getBinary(field); + final short jumpTableEntryCount = IndexedDISI.writeBitSet(values, data, IndexedDISI.DEFAULT_DENSE_RANK_POWER); + meta.writeLong(data.getFilePointer() - offset); // docsWithFieldLength + meta.writeShort(jumpTableEntryCount); + meta.writeByte(IndexedDISI.DEFAULT_DENSE_RANK_POWER); } - writer.finish(); - meta.writeLong(data.getFilePointer() - start); + + meta.writeInt(numDocsWithField); + meta.writeInt(minLength); + meta.writeInt(maxLength); + + blockWriter.writeMetaData(); } } } From d3760fdf5ef310d88931c9b3a7ab5d29b19e5de3 Mon Sep 17 00:00:00 2001 From: Parker Timmins Date: Mon, 6 Oct 2025 14:45:35 -0500 Subject: [PATCH 11/15] Copy over remainder of compression code, dont use optimized binary doc values --- .../codec/tsdb/BinaryDVCompressionMode.java | 2 +- .../es819/ES819TSDBDocValuesConsumer.java | 139 ++++----------- .../es819/ES819TSDBDocValuesProducer.java | 162 +++++++++++++++++- .../codec/tsdb/DocValuesCodecDuelTests.java | 8 +- .../es819/ES819TSDBDocValuesFormatTests.java | 5 +- 5 files changed, 206 insertions(+), 110 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/BinaryDVCompressionMode.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/BinaryDVCompressionMode.java index ddbd6d493c3b1..ce0f365eb529e 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/BinaryDVCompressionMode.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/BinaryDVCompressionMode.java @@ -19,7 +19,7 @@ public enum BinaryDVCompressionMode { this.code = code; } - static BinaryDVCompressionMode fromMode(byte mode) { + public static BinaryDVCompressionMode fromMode(byte mode) { return switch (mode) { case 0 -> NO_COMPRESS; case 1 -> COMPRESSED_WITH_LZ4; diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java index 1b71a558ae775..fce05c94a4d3e 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java @@ -462,116 +462,51 @@ public void doAddUncompressedBinary(FieldInfo field, DocValuesProducer valuesPro } public void doAddCompressedBinaryLZ4(FieldInfo field, DocValuesProducer valuesProducer) throws IOException { - if (valuesProducer instanceof TsdbDocValuesProducer tsdbValuesProducer && tsdbValuesProducer.mergeStats.supported()) { - final int numDocsWithField = tsdbValuesProducer.mergeStats.sumNumDocsWithField(); - final int minLength = tsdbValuesProducer.mergeStats.minLength(); - final int maxLength = tsdbValuesProducer.mergeStats.maxLength(); - - assert numDocsWithField <= maxDoc; - + try (CompressedBinaryBlockWriter blockWriter = new CompressedBinaryBlockWriter()) { BinaryDocValues values = valuesProducer.getBinary(field); long start = data.getFilePointer(); meta.writeLong(start); // dataOffset - - OffsetsAccumulator offsetsAccumulator = null; - DISIAccumulator disiAccumulator = null; - try { - if (numDocsWithField > 0 && numDocsWithField < maxDoc) { - disiAccumulator = new DISIAccumulator(dir, context, data, IndexedDISI.DEFAULT_DENSE_RANK_POWER); - } - - assert maxLength >= minLength; - if (maxLength > minLength) { - offsetsAccumulator = new OffsetsAccumulator(dir, context, data, numDocsWithField); - } - - for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) { - BytesRef v = values.binaryValue(); - data.writeBytes(v.bytes, v.offset, v.length); - if (disiAccumulator != null) { - disiAccumulator.addDocId(doc); - } - if (offsetsAccumulator != null) { - offsetsAccumulator.addDoc(v.length); - } - } - meta.writeLong(data.getFilePointer() - start); // dataLength - - if (numDocsWithField == 0) { - meta.writeLong(-2); // docsWithFieldOffset - meta.writeLong(0L); // docsWithFieldLength - meta.writeShort((short) -1); // jumpTableEntryCount - meta.writeByte((byte) -1); // denseRankPower - } else if (numDocsWithField == maxDoc) { - meta.writeLong(-1); // docsWithFieldOffset - meta.writeLong(0L); // docsWithFieldLength - meta.writeShort((short) -1); // jumpTableEntryCount - meta.writeByte((byte) -1); // denseRankPower - } else { - long offset = data.getFilePointer(); - meta.writeLong(offset); // docsWithFieldOffset - final short jumpTableEntryCount = disiAccumulator.build(data); - meta.writeLong(data.getFilePointer() - offset); // docsWithFieldLength - meta.writeShort(jumpTableEntryCount); - meta.writeByte(IndexedDISI.DEFAULT_DENSE_RANK_POWER); - } - - meta.writeInt(numDocsWithField); - meta.writeInt(minLength); - meta.writeInt(maxLength); - if (offsetsAccumulator != null) { - offsetsAccumulator.build(meta, data); - } - } finally { - IOUtils.close(disiAccumulator, offsetsAccumulator); + int numDocsWithField = 0; + int minLength = Integer.MAX_VALUE; + int maxLength = 0; + for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) { + numDocsWithField++; + BytesRef v = values.binaryValue(); + blockWriter.addDoc(doc, v); + int length = v.length; + minLength = Math.min(length, minLength); + maxLength = Math.max(length, maxLength); } - } else { - try (CompressedBinaryBlockWriter blockWriter = new CompressedBinaryBlockWriter()) { - BinaryDocValues values = valuesProducer.getBinary(field); - long start = data.getFilePointer(); - meta.writeLong(start); // dataOffset - int numDocsWithField = 0; - int minLength = Integer.MAX_VALUE; - int maxLength = 0; - for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) { - numDocsWithField++; - BytesRef v = values.binaryValue(); - blockWriter.addDoc(doc, v); - int length = v.length; - minLength = Math.min(length, minLength); - maxLength = Math.max(length, maxLength); - } - blockWriter.flushData(); + blockWriter.flushData(); - assert numDocsWithField <= maxDoc; - meta.writeLong(data.getFilePointer() - start); // dataLength + assert numDocsWithField <= maxDoc; + meta.writeLong(data.getFilePointer() - start); // dataLength - if (numDocsWithField == 0) { - meta.writeLong(-2); // docsWithFieldOffset - meta.writeLong(0L); // docsWithFieldLength - meta.writeShort((short) -1); // jumpTableEntryCount - meta.writeByte((byte) -1); // denseRankPower - } else if (numDocsWithField == maxDoc) { - meta.writeLong(-1); // docsWithFieldOffset - meta.writeLong(0L); // docsWithFieldLength - meta.writeShort((short) -1); // jumpTableEntryCount - meta.writeByte((byte) -1); // denseRankPower - } else { - long offset = data.getFilePointer(); - meta.writeLong(offset); // docsWithFieldOffset - values = valuesProducer.getBinary(field); - final short jumpTableEntryCount = IndexedDISI.writeBitSet(values, data, IndexedDISI.DEFAULT_DENSE_RANK_POWER); - meta.writeLong(data.getFilePointer() - offset); // docsWithFieldLength - meta.writeShort(jumpTableEntryCount); - meta.writeByte(IndexedDISI.DEFAULT_DENSE_RANK_POWER); - } + if (numDocsWithField == 0) { + meta.writeLong(-2); // docsWithFieldOffset + meta.writeLong(0L); // docsWithFieldLength + meta.writeShort((short) -1); // jumpTableEntryCount + meta.writeByte((byte) -1); // denseRankPower + } else if (numDocsWithField == maxDoc) { + meta.writeLong(-1); // docsWithFieldOffset + meta.writeLong(0L); // docsWithFieldLength + meta.writeShort((short) -1); // jumpTableEntryCount + meta.writeByte((byte) -1); // denseRankPower + } else { + long offset = data.getFilePointer(); + meta.writeLong(offset); // docsWithFieldOffset + values = valuesProducer.getBinary(field); + final short jumpTableEntryCount = IndexedDISI.writeBitSet(values, data, IndexedDISI.DEFAULT_DENSE_RANK_POWER); + meta.writeLong(data.getFilePointer() - offset); // docsWithFieldLength + meta.writeShort(jumpTableEntryCount); + meta.writeByte(IndexedDISI.DEFAULT_DENSE_RANK_POWER); + } - meta.writeInt(numDocsWithField); - meta.writeInt(minLength); - meta.writeInt(maxLength); + meta.writeInt(numDocsWithField); + meta.writeInt(minLength); + meta.writeInt(maxLength); - blockWriter.writeMetaData(); - } + blockWriter.writeMetaData(); } } diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java index e26f8d04965ae..f20dc498d346f 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java @@ -45,12 +45,15 @@ import org.apache.lucene.util.packed.PackedInts; import org.elasticsearch.core.Assertions; import org.elasticsearch.core.IOUtils; +import org.elasticsearch.index.codec.tsdb.BinaryDVCompressionMode; import org.elasticsearch.index.codec.tsdb.TSDBDocValuesEncoder; import org.elasticsearch.index.mapper.BlockDocValuesReader; import org.elasticsearch.index.mapper.BlockLoader; import java.io.IOException; +import static org.elasticsearch.index.codec.tsdb.BinaryDVCompressionMode.COMPRESSED_WITH_LZ4; +import static org.elasticsearch.index.codec.tsdb.BinaryDVCompressionMode.NO_COMPRESS; import static org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat.SKIP_INDEX_JUMP_LENGTH_PER_LEVEL; import static org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat.SKIP_INDEX_MAX_LEVEL; import static org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat.TERMS_DICT_BLOCK_LZ4_SHIFT; @@ -97,7 +100,7 @@ final class ES819TSDBDocValuesProducer extends DocValuesProducer { state.segmentSuffix ); - readFields(in, state.fieldInfos); + readFields(in, state.fieldInfos, version); } catch (Throwable exception) { priorE = exception; @@ -193,6 +196,13 @@ public BinaryDocValues getBinary(FieldInfo field) throws IOException { return DocValues.emptyBinary(); } + return switch (entry.compression) { + case NO_COMPRESS -> getUncompressedBinary(entry); + case COMPRESSED_WITH_LZ4 -> getCompressedBinary(entry); + }; + } + + public BinaryDocValues getUncompressedBinary(BinaryEntry entry) throws IOException { final RandomAccessInput bytesSlice = data.randomAccessSlice(entry.dataOffset, entry.dataLength); if (entry.docsWithFieldOffset == -1) { @@ -309,6 +319,132 @@ public BytesRef binaryValue() throws IOException { } } + // START: Copied fom LUCENE-9211 + private BinaryDocValues getCompressedBinary(BinaryEntry entry) throws IOException { + if (entry.docsWithFieldOffset == -1) { + // dense + final RandomAccessInput addressesData = this.data.randomAccessSlice(entry.addressesOffset, entry.addressesLength); + final LongValues addresses = DirectMonotonicReader.getInstance(entry.addressesMeta, addressesData); + return new DenseBinaryDocValues(maxDoc) { + final BinaryDecoder decoder = new BinaryDecoder( + addresses, + data.clone(), + entry.maxUncompressedChunkSize, + entry.docsPerChunkShift + ); + + @Override + public BytesRef binaryValue() throws IOException { + return decoder.decode(doc); + } + }; + } else { + // sparse + final IndexedDISI disi = new IndexedDISI( + data, + entry.docsWithFieldOffset, + entry.docsWithFieldLength, + entry.jumpTableEntryCount, + entry.denseRankPower, + entry.numDocsWithField + ); + final RandomAccessInput addressesData = this.data.randomAccessSlice(entry.addressesOffset, entry.addressesLength); + final LongValues addresses = DirectMonotonicReader.getInstance(entry.addressesMeta, addressesData); + return new SparseBinaryDocValues(disi) { + final BinaryDecoder decoder = new BinaryDecoder( + addresses, + data.clone(), + entry.maxUncompressedChunkSize, + entry.docsPerChunkShift + ); + + @Override + public BytesRef binaryValue() throws IOException { + return decoder.decode(disi.index()); + } + }; + } + + } + + // Decompresses blocks of binary values to retrieve content + static final class BinaryDecoder { + + private final LongValues addresses; + private final IndexInput compressedData; + // Cache of last uncompressed block + private long lastBlockId = -1; + private final int[] uncompressedDocStarts; + private final byte[] uncompressedBlock; + private final BytesRef uncompressedBytesRef; + private final int docsPerChunk; + private final int docsPerChunkShift; + + BinaryDecoder(LongValues addresses, IndexInput compressedData, int biggestUncompressedBlockSize, int docsPerChunkShift) { + super(); + this.addresses = addresses; + this.compressedData = compressedData; + // pre-allocate a byte array large enough for the biggest uncompressed block needed. + this.uncompressedBlock = new byte[biggestUncompressedBlockSize]; + uncompressedBytesRef = new BytesRef(uncompressedBlock); + this.docsPerChunk = 1 << docsPerChunkShift; + this.docsPerChunkShift = docsPerChunkShift; + uncompressedDocStarts = new int[docsPerChunk + 1]; + } + + BytesRef decode(int docNumber) throws IOException { + int blockId = docNumber >> docsPerChunkShift; + int docInBlockId = docNumber % docsPerChunk; + assert docInBlockId < docsPerChunk; + + // already read and uncompressed? + if (blockId != lastBlockId) { + lastBlockId = blockId; + long blockStartOffset = addresses.get(blockId); + compressedData.seek(blockStartOffset); + + int uncompressedBlockLength = 0; + + int onlyLength = -1; + for (int i = 0; i < docsPerChunk; i++) { + if (i == 0) { + // The first length value is special. It is shifted and has a bit to denote if + // all other values are the same length + int lengthPlusSameInd = compressedData.readVInt(); + int sameIndicator = lengthPlusSameInd & 1; + int firstValLength = lengthPlusSameInd >>> 1; + if (sameIndicator == 1) { + onlyLength = firstValLength; + } + uncompressedBlockLength += firstValLength; + } else { + if (onlyLength == -1) { + // Various lengths are stored - read each from disk + uncompressedBlockLength += compressedData.readVInt(); + } else { + // Only one length + uncompressedBlockLength += onlyLength; + } + } + uncompressedDocStarts[i + 1] = uncompressedBlockLength; + } + + if (uncompressedBlockLength == 0) { + uncompressedBytesRef.offset = 0; + uncompressedBytesRef.length = 0; + return uncompressedBytesRef; + } + + assert uncompressedBlockLength <= uncompressedBlock.length; + LZ4.decompress(EndiannessReverserUtil.wrapDataInput(compressedData), uncompressedBlockLength, uncompressedBlock, 0); + } + + uncompressedBytesRef.offset = uncompressedDocStarts[docInBlockId]; + uncompressedBytesRef.length = uncompressedDocStarts[docInBlockId + 1] - uncompressedBytesRef.offset; + return uncompressedBytesRef; + } + } + // END: Copied fom LUCENE-9211 private abstract static class DenseBinaryDocValues extends BinaryDocValues implements BlockLoader.OptionalColumnAtATimeReader { final int maxDoc; @@ -1129,7 +1265,7 @@ static int primarySortFieldNumber(SegmentInfo segmentInfo, FieldInfos fieldInfos return -1; } - private void readFields(IndexInput meta, FieldInfos infos) throws IOException { + private void readFields(IndexInput meta, FieldInfos infos, int version) throws IOException { for (int fieldNumber = meta.readInt(); fieldNumber != -1; fieldNumber = meta.readInt()) { FieldInfo info = infos.fieldInfo(fieldNumber); if (info == null) { @@ -1142,7 +1278,7 @@ private void readFields(IndexInput meta, FieldInfos infos) throws IOException { if (type == ES819TSDBDocValuesFormat.NUMERIC) { numerics.put(info.number, readNumeric(meta)); } else if (type == ES819TSDBDocValuesFormat.BINARY) { - binaries.put(info.number, readBinary(meta)); + binaries.put(info.number, readBinary(meta, version)); } else if (type == ES819TSDBDocValuesFormat.SORTED) { sorted.put(info.number, readSorted(meta)); } else if (type == ES819TSDBDocValuesFormat.SORTED_SET) { @@ -1204,8 +1340,15 @@ private static void readNumeric(IndexInput meta, NumericEntry entry) throws IOEx entry.denseRankPower = meta.readByte(); } - private BinaryEntry readBinary(IndexInput meta) throws IOException { - final BinaryEntry entry = new BinaryEntry(); + private BinaryEntry readBinary(IndexInput meta, int version) throws IOException { + final BinaryDVCompressionMode compression; + if (version >= ES819TSDBDocValuesFormat.VERSION_BINARY_DV_COMPRESSION) { + compression = BinaryDVCompressionMode.fromMode(meta.readByte()); + } else { + compression = BinaryDVCompressionMode.NO_COMPRESS; + } + final BinaryEntry entry = new BinaryEntry(compression); + entry.dataOffset = meta.readLong(); entry.dataLength = meta.readLong(); entry.docsWithFieldOffset = meta.readLong(); @@ -1888,6 +2031,8 @@ static class NumericEntry { } static class BinaryEntry { + final BinaryDVCompressionMode compression; + long dataOffset; long dataLength; long docsWithFieldOffset; @@ -1899,7 +2044,14 @@ static class BinaryEntry { int maxLength; long addressesOffset; long addressesLength; + // compression mode + int maxUncompressedChunkSize; + int docsPerChunkShift; DirectMonotonicReader.Meta addressesMeta; + + BinaryEntry(BinaryDVCompressionMode compression) { + this.compression = compression; + } } static class SortedNumericEntry extends NumericEntry { diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/DocValuesCodecDuelTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/DocValuesCodecDuelTests.java index ee9351ed51b97..0e10ae7cdbf22 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/DocValuesCodecDuelTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/DocValuesCodecDuelTests.java @@ -44,6 +44,11 @@ public class DocValuesCodecDuelTests extends ESTestCase { private static final String FIELD_4 = "number_field_4"; private static final String FIELD_5 = "binary_field_5"; + public static BinaryDVCompressionMode randomCompressionMode() { + BinaryDVCompressionMode[] modes = BinaryDVCompressionMode.values(); + return modes[random().nextInt(modes.length)]; + } + @SuppressWarnings("checkstyle:LineLength") public void testDuel() throws IOException { try (var baselineDirectory = newDirectory(); var contenderDirectory = newDirectory()) { @@ -61,7 +66,8 @@ public void testDuel() throws IOException { ? new ES819TSDBDocValuesFormat( ESTestCase.randomIntBetween(1, 4096), ESTestCase.randomIntBetween(1, 512), - random().nextBoolean() + random().nextBoolean(), + randomCompressionMode() ) : new TestES87TSDBDocValuesFormat(); diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java index 003124ab4b6f4..ee638c7697a84 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java @@ -42,6 +42,7 @@ import org.elasticsearch.common.util.CollectionUtils; import org.elasticsearch.index.codec.Elasticsearch900Lucene101Codec; import org.elasticsearch.index.codec.Elasticsearch92Lucene103Codec; +import org.elasticsearch.index.codec.tsdb.BinaryDVCompressionMode; import org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormatTests; import org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesProducer.BaseDenseNumericValues; import org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesProducer.BaseSortedDocValues; @@ -70,10 +71,12 @@ public class ES819TSDBDocValuesFormatTests extends ES87TSDBDocValuesFormatTests private final Codec codec = new Elasticsearch92Lucene103Codec() { + BinaryDVCompressionMode[] modes = BinaryDVCompressionMode.values(); final ES819TSDBDocValuesFormat docValuesFormat = new ES819TSDBDocValuesFormat( ESTestCase.randomIntBetween(2, 4096), ESTestCase.randomIntBetween(1, 512), - random().nextBoolean() + random().nextBoolean(), + modes[random().nextInt(modes.length)] ); @Override From 57c2f248a6200f754356cff38ffbfa5e190e2713 Mon Sep 17 00:00:00 2001 From: Parker Timmins Date: Mon, 6 Oct 2025 15:01:24 -0500 Subject: [PATCH 12/15] Fix issue with readBinary --- .../es819/ES819TSDBDocValuesProducer.java | 36 ++++++++++++++----- .../codec/tsdb/DocValuesCodecDuelTests.java | 8 ++--- .../codec/tsdb/TsdbDocValueBwcTests.java | 4 ++- .../es819/ES819TSDBDocValuesFormatTests.java | 9 +++-- ...ValuesFormatVariableSkipIntervalTests.java | 4 +-- 5 files changed, 41 insertions(+), 20 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java index f20dc498d346f..5ee52f7a4ab8f 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java @@ -9,6 +9,7 @@ package org.elasticsearch.index.codec.tsdb.es819; +import org.apache.lucene.backward_codecs.store.EndiannessReverserUtil; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.DocValuesProducer; import org.apache.lucene.codecs.lucene90.IndexedDISI; @@ -337,6 +338,11 @@ private BinaryDocValues getCompressedBinary(BinaryEntry entry) throws IOExceptio public BytesRef binaryValue() throws IOException { return decoder.decode(doc); } + + @Override + public BlockLoader.Block tryRead(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset, boolean nullsFiltered, BlockDocValuesReader.ToDouble toDouble, boolean toInt) throws IOException { + return null; + } }; } else { // sparse @@ -1358,15 +1364,27 @@ private BinaryEntry readBinary(IndexInput meta, int version) throws IOException entry.numDocsWithField = meta.readInt(); entry.minLength = meta.readInt(); entry.maxLength = meta.readInt(); - if (entry.minLength < entry.maxLength) { - entry.addressesOffset = meta.readLong(); - - // Old count of uncompressed addresses - long numAddresses = entry.numDocsWithField + 1L; - - final int blockShift = meta.readVInt(); - entry.addressesMeta = DirectMonotonicReader.loadMeta(meta, numAddresses, blockShift); - entry.addressesLength = meta.readLong(); + if (compression == BinaryDVCompressionMode.NO_COMPRESS) { + if (entry.minLength < entry.maxLength) { + entry.addressesOffset = meta.readLong(); + // Old count of uncompressed addresses + long numAddresses = entry.numDocsWithField + 1L; + final int blockShift = meta.readVInt(); + entry.addressesMeta = DirectMonotonicReader.loadMeta(meta, numAddresses, blockShift); + entry.addressesLength = meta.readLong(); + } + } else { + if (entry.numDocsWithField > 0 || entry.minLength < entry.maxLength) { + entry.addressesOffset = meta.readLong(); + // New count of compressed addresses - the number of compresseed blocks + int numCompressedChunks = meta.readVInt(); + entry.docsPerChunkShift = meta.readVInt(); + entry.maxUncompressedChunkSize = meta.readVInt(); + + final int blockShift = meta.readVInt(); + entry.addressesMeta = DirectMonotonicReader.loadMeta(meta, numCompressedChunks, blockShift); + entry.addressesLength = meta.readLong(); + } } return entry; } diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/DocValuesCodecDuelTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/DocValuesCodecDuelTests.java index 0e10ae7cdbf22..1efe9c3f0bf36 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/DocValuesCodecDuelTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/DocValuesCodecDuelTests.java @@ -29,6 +29,7 @@ import org.elasticsearch.index.codec.Elasticsearch92Lucene103Codec; import org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormatTests.TestES87TSDBDocValuesFormat; import org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat; +import org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormatTests; import org.elasticsearch.test.ESTestCase; import java.io.IOException; @@ -44,11 +45,6 @@ public class DocValuesCodecDuelTests extends ESTestCase { private static final String FIELD_4 = "number_field_4"; private static final String FIELD_5 = "binary_field_5"; - public static BinaryDVCompressionMode randomCompressionMode() { - BinaryDVCompressionMode[] modes = BinaryDVCompressionMode.values(); - return modes[random().nextInt(modes.length)]; - } - @SuppressWarnings("checkstyle:LineLength") public void testDuel() throws IOException { try (var baselineDirectory = newDirectory(); var contenderDirectory = newDirectory()) { @@ -67,7 +63,7 @@ public void testDuel() throws IOException { ESTestCase.randomIntBetween(1, 4096), ESTestCase.randomIntBetween(1, 512), random().nextBoolean(), - randomCompressionMode() + ES819TSDBDocValuesFormatTests.randomCompressionMode() ) : new TestES87TSDBDocValuesFormat(); diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/TsdbDocValueBwcTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/TsdbDocValueBwcTests.java index d2c8aae601977..23f85da450431 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/TsdbDocValueBwcTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/TsdbDocValueBwcTests.java @@ -42,6 +42,7 @@ import org.elasticsearch.index.codec.perfield.XPerFieldDocValuesFormat; import org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormatTests.TestES87TSDBDocValuesFormat; import org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat; +import org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormatTests; import org.elasticsearch.test.ESTestCase; import org.hamcrest.Matchers; @@ -291,7 +292,8 @@ public void testEncodeOrdinalRange() throws IOException { new ES819TSDBDocValuesFormat( random().nextInt(16, 128), nextOrdinalRangeThreshold.getAsInt(), - random().nextBoolean() + random().nextBoolean(), + ES819TSDBDocValuesFormatTests.randomCompressionMode() ) ) ); diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java index ee638c7697a84..eb4755cd9b0f3 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java @@ -43,6 +43,7 @@ import org.elasticsearch.index.codec.Elasticsearch900Lucene101Codec; import org.elasticsearch.index.codec.Elasticsearch92Lucene103Codec; import org.elasticsearch.index.codec.tsdb.BinaryDVCompressionMode; +import org.elasticsearch.index.codec.tsdb.DocValuesCodecDuelTests; import org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormatTests; import org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesProducer.BaseDenseNumericValues; import org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesProducer.BaseSortedDocValues; @@ -69,14 +70,18 @@ public class ES819TSDBDocValuesFormatTests extends ES87TSDBDocValuesFormatTests { + public static BinaryDVCompressionMode randomCompressionMode() { + BinaryDVCompressionMode[] modes = BinaryDVCompressionMode.values(); + return modes[random().nextInt(modes.length)]; + } + private final Codec codec = new Elasticsearch92Lucene103Codec() { - BinaryDVCompressionMode[] modes = BinaryDVCompressionMode.values(); final ES819TSDBDocValuesFormat docValuesFormat = new ES819TSDBDocValuesFormat( ESTestCase.randomIntBetween(2, 4096), ESTestCase.randomIntBetween(1, 512), random().nextBoolean(), - modes[random().nextInt(modes.length)] + randomCompressionMode() ); @Override diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatVariableSkipIntervalTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatVariableSkipIntervalTests.java index 247b75f2977b5..0ecb042b3e215 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatVariableSkipIntervalTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatVariableSkipIntervalTests.java @@ -19,14 +19,14 @@ public class ES819TSDBDocValuesFormatVariableSkipIntervalTests extends ES87TSDBD protected Codec getCodec() { // small interval size to test with many intervals return TestUtil.alwaysDocValuesFormat( - new ES819TSDBDocValuesFormat(random().nextInt(4, 16), random().nextInt(1, 32), random().nextBoolean()) + new ES819TSDBDocValuesFormat(random().nextInt(4, 16), random().nextInt(1, 32), random().nextBoolean(), ES819TSDBDocValuesFormatTests.randomCompressionMode()) ); } public void testSkipIndexIntervalSize() { IllegalArgumentException ex = expectThrows( IllegalArgumentException.class, - () -> new ES819TSDBDocValuesFormat(random().nextInt(Integer.MIN_VALUE, 2), random().nextInt(1, 32), random().nextBoolean()) + () -> new ES819TSDBDocValuesFormat(random().nextInt(Integer.MIN_VALUE, 2), random().nextInt(1, 32), random().nextBoolean(), ES819TSDBDocValuesFormatTests.randomCompressionMode()) ); assertTrue(ex.getMessage().contains("skipIndexIntervalSize must be > 1")); } From 48e15a881118752478e05fa47206baea9bfee588 Mon Sep 17 00:00:00 2001 From: Parker Timmins Date: Mon, 6 Oct 2025 15:06:18 -0500 Subject: [PATCH 13/15] Restrict compression to keyword binary doc values --- .../index/codec/PerFieldFormatSupplier.java | 12 +++++++++++- .../codec/tsdb/es819/ES819TSDBDocValuesFormat.java | 4 ++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java b/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java index 2ed1aa6c9f17f..6d4cc5cc1276a 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java +++ b/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java @@ -20,9 +20,11 @@ import org.elasticsearch.index.IndexVersions; import org.elasticsearch.index.codec.bloomfilter.ES87BloomFilterPostingsFormat; import org.elasticsearch.index.codec.postings.ES812PostingsFormat; +import org.elasticsearch.index.codec.tsdb.BinaryDVCompressionMode; import org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat; import org.elasticsearch.index.mapper.CompletionFieldMapper; import org.elasticsearch.index.mapper.IdFieldMapper; +import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.Mapper; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.mapper.SeqNoFieldMapper; @@ -57,7 +59,8 @@ public class PerFieldFormatSupplier { private static final DocValuesFormat docValuesFormat = new Lucene90DocValuesFormat(); private static final KnnVectorsFormat knnVectorsFormat = new Lucene99HnswVectorsFormat(); - private static final ES819TSDBDocValuesFormat tsdbDocValuesFormat = new ES819TSDBDocValuesFormat(); + private static final ES819TSDBDocValuesFormat tsdbDocValuesFormat = new ES819TSDBDocValuesFormat(BinaryDVCompressionMode.NO_COMPRESS); + private static final DocValuesFormat compressedBinaryDocValuesFormat = new ES819TSDBDocValuesFormat(BinaryDVCompressionMode.COMPRESSED_WITH_LZ4); private static final ES812PostingsFormat es812PostingsFormat = new ES812PostingsFormat(); private static final PostingsFormat completionPostingsFormat = PostingsFormat.forName("Completion101"); @@ -127,6 +130,13 @@ public KnnVectorsFormat getKnnVectorsFormatForField(String field) { } public DocValuesFormat getDocValuesFormatForField(String field) { + if (mapperService != null) { + Mapper mapper = mapperService.mappingLookup().getMapper(field); + if (mapper != null && KeywordFieldMapper.CONTENT_TYPE.equals(mapper.typeName())) { + return compressedBinaryDocValuesFormat; + } + } + if (useTSDBDocValuesFormat(field)) { return tsdbDocValuesFormat; } diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java index 63029f382caf3..fd776c8c73e74 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java @@ -128,6 +128,10 @@ public ES819TSDBDocValuesFormat() { this(DEFAULT_SKIP_INDEX_INTERVAL_SIZE, ORDINAL_RANGE_ENCODING_MIN_DOC_PER_ORDINAL, OPTIMIZED_MERGE_ENABLE_DEFAULT, BinaryDVCompressionMode.COMPRESSED_WITH_LZ4); } + public ES819TSDBDocValuesFormat(BinaryDVCompressionMode binaryDVCompressionMode) { + this(DEFAULT_SKIP_INDEX_INTERVAL_SIZE, ORDINAL_RANGE_ENCODING_MIN_DOC_PER_ORDINAL, OPTIMIZED_MERGE_ENABLE_DEFAULT, binaryDVCompressionMode); + } + /** Doc values fields format with specified skipIndexIntervalSize. */ public ES819TSDBDocValuesFormat(int skipIndexIntervalSize, int minDocsPerOrdinalForRangeEncoding, boolean enableOptimizedMerge, BinaryDVCompressionMode binaryDVCompressionMode) { super(CODEC_NAME); From 1a48f0414b3f3b1d3f2e0086e7ce8cf2f66ae1d3 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Tue, 7 Oct 2025 14:09:01 +0000 Subject: [PATCH 14/15] [CI] Auto commit changes from spotless --- .../index/codec/PerFieldFormatSupplier.java | 4 +++- .../tsdb/es819/ES819TSDBDocValuesFormat.java | 21 ++++++++++++++++--- .../es819/ES819TSDBDocValuesProducer.java | 10 ++++++++- .../es819/ES819TSDBDocValuesFormatTests.java | 1 - ...ValuesFormatVariableSkipIntervalTests.java | 14 +++++++++++-- 5 files changed, 42 insertions(+), 8 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java b/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java index 6d4cc5cc1276a..b0b6c72b3ef81 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java +++ b/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java @@ -60,7 +60,9 @@ public class PerFieldFormatSupplier { private static final DocValuesFormat docValuesFormat = new Lucene90DocValuesFormat(); private static final KnnVectorsFormat knnVectorsFormat = new Lucene99HnswVectorsFormat(); private static final ES819TSDBDocValuesFormat tsdbDocValuesFormat = new ES819TSDBDocValuesFormat(BinaryDVCompressionMode.NO_COMPRESS); - private static final DocValuesFormat compressedBinaryDocValuesFormat = new ES819TSDBDocValuesFormat(BinaryDVCompressionMode.COMPRESSED_WITH_LZ4); + private static final DocValuesFormat compressedBinaryDocValuesFormat = new ES819TSDBDocValuesFormat( + BinaryDVCompressionMode.COMPRESSED_WITH_LZ4 + ); private static final ES812PostingsFormat es812PostingsFormat = new ES812PostingsFormat(); private static final PostingsFormat completionPostingsFormat = PostingsFormat.forName("Completion101"); diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java index fd776c8c73e74..59eadd7825ec2 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java @@ -125,15 +125,30 @@ private static boolean getOptimizedMergeEnabledDefault() { /** Default constructor. */ public ES819TSDBDocValuesFormat() { - this(DEFAULT_SKIP_INDEX_INTERVAL_SIZE, ORDINAL_RANGE_ENCODING_MIN_DOC_PER_ORDINAL, OPTIMIZED_MERGE_ENABLE_DEFAULT, BinaryDVCompressionMode.COMPRESSED_WITH_LZ4); + this( + DEFAULT_SKIP_INDEX_INTERVAL_SIZE, + ORDINAL_RANGE_ENCODING_MIN_DOC_PER_ORDINAL, + OPTIMIZED_MERGE_ENABLE_DEFAULT, + BinaryDVCompressionMode.COMPRESSED_WITH_LZ4 + ); } public ES819TSDBDocValuesFormat(BinaryDVCompressionMode binaryDVCompressionMode) { - this(DEFAULT_SKIP_INDEX_INTERVAL_SIZE, ORDINAL_RANGE_ENCODING_MIN_DOC_PER_ORDINAL, OPTIMIZED_MERGE_ENABLE_DEFAULT, binaryDVCompressionMode); + this( + DEFAULT_SKIP_INDEX_INTERVAL_SIZE, + ORDINAL_RANGE_ENCODING_MIN_DOC_PER_ORDINAL, + OPTIMIZED_MERGE_ENABLE_DEFAULT, + binaryDVCompressionMode + ); } /** Doc values fields format with specified skipIndexIntervalSize. */ - public ES819TSDBDocValuesFormat(int skipIndexIntervalSize, int minDocsPerOrdinalForRangeEncoding, boolean enableOptimizedMerge, BinaryDVCompressionMode binaryDVCompressionMode) { + public ES819TSDBDocValuesFormat( + int skipIndexIntervalSize, + int minDocsPerOrdinalForRangeEncoding, + boolean enableOptimizedMerge, + BinaryDVCompressionMode binaryDVCompressionMode + ) { super(CODEC_NAME); this.binaryDVCompressionMode = binaryDVCompressionMode; if (skipIndexIntervalSize < 2) { diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java index 5ee52f7a4ab8f..00e5667a5d177 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java @@ -340,7 +340,14 @@ public BytesRef binaryValue() throws IOException { } @Override - public BlockLoader.Block tryRead(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset, boolean nullsFiltered, BlockDocValuesReader.ToDouble toDouble, boolean toInt) throws IOException { + public BlockLoader.Block tryRead( + BlockLoader.BlockFactory factory, + BlockLoader.Docs docs, + int offset, + boolean nullsFiltered, + BlockDocValuesReader.ToDouble toDouble, + boolean toInt + ) throws IOException { return null; } }; @@ -450,6 +457,7 @@ BytesRef decode(int docNumber) throws IOException { return uncompressedBytesRef; } } + // END: Copied fom LUCENE-9211 private abstract static class DenseBinaryDocValues extends BinaryDocValues implements BlockLoader.OptionalColumnAtATimeReader { diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java index eb4755cd9b0f3..e37ec4e1b95c5 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java @@ -43,7 +43,6 @@ import org.elasticsearch.index.codec.Elasticsearch900Lucene101Codec; import org.elasticsearch.index.codec.Elasticsearch92Lucene103Codec; import org.elasticsearch.index.codec.tsdb.BinaryDVCompressionMode; -import org.elasticsearch.index.codec.tsdb.DocValuesCodecDuelTests; import org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormatTests; import org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesProducer.BaseDenseNumericValues; import org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesProducer.BaseSortedDocValues; diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatVariableSkipIntervalTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatVariableSkipIntervalTests.java index 0ecb042b3e215..2587942febb14 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatVariableSkipIntervalTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatVariableSkipIntervalTests.java @@ -19,14 +19,24 @@ public class ES819TSDBDocValuesFormatVariableSkipIntervalTests extends ES87TSDBD protected Codec getCodec() { // small interval size to test with many intervals return TestUtil.alwaysDocValuesFormat( - new ES819TSDBDocValuesFormat(random().nextInt(4, 16), random().nextInt(1, 32), random().nextBoolean(), ES819TSDBDocValuesFormatTests.randomCompressionMode()) + new ES819TSDBDocValuesFormat( + random().nextInt(4, 16), + random().nextInt(1, 32), + random().nextBoolean(), + ES819TSDBDocValuesFormatTests.randomCompressionMode() + ) ); } public void testSkipIndexIntervalSize() { IllegalArgumentException ex = expectThrows( IllegalArgumentException.class, - () -> new ES819TSDBDocValuesFormat(random().nextInt(Integer.MIN_VALUE, 2), random().nextInt(1, 32), random().nextBoolean(), ES819TSDBDocValuesFormatTests.randomCompressionMode()) + () -> new ES819TSDBDocValuesFormat( + random().nextInt(Integer.MIN_VALUE, 2), + random().nextInt(1, 32), + random().nextBoolean(), + ES819TSDBDocValuesFormatTests.randomCompressionMode() + ) ); assertTrue(ex.getMessage().contains("skipIndexIntervalSize must be > 1")); } From 0bdb7850187de7756564b61c1fce0cf54b41a4dc Mon Sep 17 00:00:00 2001 From: Parker Timmins Date: Tue, 7 Oct 2025 10:24:08 -0500 Subject: [PATCH 15/15] Fix compilation error in benchmark --- .../index/codec/tsdb/TSDBDocValuesMergeBenchmark.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/TSDBDocValuesMergeBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/TSDBDocValuesMergeBenchmark.java index a3b2fd3633adf..94a080db539dc 100644 --- a/benchmarks/src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/TSDBDocValuesMergeBenchmark.java +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/TSDBDocValuesMergeBenchmark.java @@ -27,6 +27,7 @@ import org.elasticsearch.cluster.metadata.DataStream; import org.elasticsearch.common.logging.LogConfigurator; import org.elasticsearch.index.codec.Elasticsearch92Lucene103Codec; +import org.elasticsearch.index.codec.tsdb.BinaryDVCompressionMode; import org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; @@ -257,7 +258,7 @@ private static IndexWriterConfig createIndexWriterConfig(boolean optimizedMergeE ); config.setLeafSorter(DataStream.TIMESERIES_LEAF_READERS_SORTER); config.setMergePolicy(new LogByteSizeMergePolicy()); - var docValuesFormat = new ES819TSDBDocValuesFormat(4096, 512, optimizedMergeEnabled); + var docValuesFormat = new ES819TSDBDocValuesFormat(4096, 512, optimizedMergeEnabled, BinaryDVCompressionMode.COMPRESSED_WITH_LZ4); config.setCodec(new Elasticsearch92Lucene103Codec() { @Override public DocValuesFormat getDocValuesFormatForField(String field) {