From e3509c8a8e280d6258aedeee303909eb36378e2a Mon Sep 17 00:00:00 2001
From: Martijn van Groningen <martijn.v.groningen@gmail.com>
Date: Mon, 6 Oct 2025 12:09:19 +0200
Subject: [PATCH 01/15] Introduce `index.mapping.use_binary_doc_values` to
 experiment with binary doc values for keyword field.

---
 .../esql/ValuesSourceReaderBenchmark.java     |   4 +-
 .../extras/MatchOnlyTextFieldTypeTests.java   |   8 +-
 .../common/settings/IndexScopedSettings.java  |   1 +
 .../elasticsearch/index/IndexSettings.java    |   7 +
 .../index/mapper/BlockDocValuesReader.java    |  22 +++
 .../index/mapper/KeywordFieldMapper.java      | 157 +++++++++++++++---
 .../index/mapper/KeywordFieldTypeTests.java   |  21 ++-
 .../index/mapper/TextFieldTypeTests.java      |   6 +-
 .../ValueSourceReaderTypeConversionTests.java |   4 +-
 .../read/ValuesSourceReaderOperatorTests.java |   4 +-
 .../planner/EsPhysicalOperationProviders.java |   3 +-
 .../test/keyword_use_binary_doc_values.yml    | 117 +++++++++++++
 12 files changed, 307 insertions(+), 47 deletions(-)
 create mode 100644 x-pack/plugin/logsdb/src/yamlRestTest/resources/rest-api-spec/test/keyword_use_binary_doc_values.yml

diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/_nightly/esql/ValuesSourceReaderBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/_nightly/esql/ValuesSourceReaderBenchmark.java
index 9cc304828a8a7..b9756c267e0b5 100644
--- a/benchmarks/src/main/java/org/elasticsearch/benchmark/_nightly/esql/ValuesSourceReaderBenchmark.java
+++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/_nightly/esql/ValuesSourceReaderBenchmark.java
@@ -222,8 +222,8 @@ private static BlockLoader blockLoader(String name) {
                 Lucene.KEYWORD_ANALYZER,
                 Lucene.KEYWORD_ANALYZER,
                 new KeywordFieldMapper.Builder(name, IndexVersion.current()).docValues(ft.docValuesType() != DocValuesType.NONE),
-                syntheticSource
-            ).blockLoader(new MappedFieldType.BlockLoaderContext() {
+                syntheticSource,
+                    useBinaryDocValues).blockLoader(new MappedFieldType.BlockLoaderContext() {
                 @Override
                 public String indexName() {
                     return "benchmark";
diff --git a/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldTypeTests.java b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldTypeTests.java
index 8e70945fc2a76..bbbdacea062e3 100644
--- a/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldTypeTests.java
+++ b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldTypeTests.java
@@ -298,8 +298,8 @@ public void testBlockLoaderDoesNotUseSyntheticSourceDelegateWhenIgnoreAboveIsSet
             mock(NamedAnalyzer.class),
             mock(NamedAnalyzer.class),
             builder,
-            true
-        );
+            true,
+                useBinaryDocValues);
 
         MatchOnlyTextFieldMapper.MatchOnlyTextFieldType ft = new MatchOnlyTextFieldMapper.MatchOnlyTextFieldType(
             "parent",
@@ -346,8 +346,8 @@ public void testBlockLoaderDoesNotUseSyntheticSourceDelegateWhenIgnoreAboveIsSet
             mock(NamedAnalyzer.class),
             mock(NamedAnalyzer.class),
             builder,
-            true
-        );
+            true,
+                useBinaryDocValues);
 
         MatchOnlyTextFieldMapper.MatchOnlyTextFieldType ft = new MatchOnlyTextFieldMapper.MatchOnlyTextFieldType(
             "parent",
diff --git a/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java b/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java
index d4fef4e9bb489..9de1f103d85f5 100644
--- a/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java
+++ b/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java
@@ -243,6 +243,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings {
         if (IndexSettings.DOC_VALUES_SKIPPER) {
             settings.add(IndexSettings.USE_DOC_VALUES_SKIPPER);
         }
+        settings.add(IndexSettings.USE_BINARY_DOC_VALUES);
         settings.add(IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING);
         BUILT_IN_INDEX_SETTINGS = Collections.unmodifiableSet(settings);
     };
diff --git a/server/src/main/java/org/elasticsearch/index/IndexSettings.java b/server/src/main/java/org/elasticsearch/index/IndexSettings.java
index b396e1ca206e3..de73e65a4696f 100644
--- a/server/src/main/java/org/elasticsearch/index/IndexSettings.java
+++ b/server/src/main/java/org/elasticsearch/index/IndexSettings.java
@@ -675,6 +675,13 @@ public boolean isES87TSDBCodecEnabled() {
         Property.Final
     );
 
+    public static final Setting<Boolean> USE_BINARY_DOC_VALUES = Setting.boolSetting(
+        "index.mapping.use_binary_doc_values",
+        false,
+        Property.IndexScope,
+        Property.Final
+    );
+
     /**
      * The {@link IndexMode "mode"} of the index.
      */
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java b/server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java
index 457c90383b5d2..795401dd3e3e3 100644
--- a/server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java
@@ -925,6 +925,28 @@ public AllReader reader(LeafReaderContext context) throws IOException {
         }
     }
 
+    public static class BytesRefsFromBinaryBlockLoader extends DocValuesBlockLoader {
+        private final String fieldName;
+
+        public BytesRefsFromBinaryBlockLoader(String fieldName) {
+            this.fieldName = fieldName;
+        }
+
+        @Override
+        public Builder builder(BlockFactory factory, int expectedCount) {
+            return factory.bytesRefs(expectedCount);
+        }
+
+        @Override
+        public AllReader reader(LeafReaderContext context) throws IOException {
+            BinaryDocValues docValues = context.reader().getBinaryDocValues(fieldName);
+            if (docValues == null) {
+                return new ConstantNullsReader();
+            }
+            return new BytesRefsFromBinary(docValues);
+        }
+    }
+
     abstract static class AbstractBytesRefsFromBinary extends BlockDocValuesReader {
         protected final BinaryDocValues docValues;
 
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java
index cf3fad86812f5..eee62b72ccac8 100644
--- a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java
@@ -13,15 +13,18 @@
 import org.apache.logging.log4j.Logger;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.document.BinaryDocValuesField;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.InvertableType;
 import org.apache.lucene.document.SortedSetDocValuesField;
 import org.apache.lucene.document.StoredField;
+import org.apache.lucene.index.BinaryDocValues;
 import org.apache.lucene.index.DocValuesSkipIndexType;
 import org.apache.lucene.index.DocValuesType;
 import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.MultiTerms;
 import org.apache.lucene.index.Term;
@@ -90,6 +93,7 @@
 import static org.apache.lucene.index.IndexWriter.MAX_TERM_LENGTH;
 import static org.elasticsearch.core.Strings.format;
 import static org.elasticsearch.index.IndexSettings.IGNORE_ABOVE_SETTING;
+import static org.elasticsearch.index.IndexSettings.USE_BINARY_DOC_VALUES;
 import static org.elasticsearch.index.IndexSettings.USE_DOC_VALUES_SKIPPER;
 import static org.elasticsearch.index.mapper.FieldArrayContext.getOffsetsFieldName;
 import static org.elasticsearch.index.mapper.Mapper.IgnoreAbove.getIgnoreAboveDefaultValue;
@@ -108,6 +112,8 @@ public static class Defaults {
         public static final FieldType FIELD_TYPE;
         public static final FieldType FIELD_TYPE_WITH_SKIP_DOC_VALUES;
 
+        public static final FieldType FIELD_TYPE_WITH_BINARY_DOC_VALUES;
+
         static {
             FieldType ft = new FieldType();
             ft.setTokenized(false);
@@ -127,6 +133,15 @@ public static class Defaults {
             FIELD_TYPE_WITH_SKIP_DOC_VALUES = freezeAndDeduplicateFieldType(ft);
         }
 
+        static {
+            FieldType ft = new FieldType();
+            ft.setTokenized(false);
+            ft.setOmitNorms(true);
+            ft.setIndexOptions(IndexOptions.NONE);
+            ft.setDocValuesType(DocValuesType.BINARY);
+            FIELD_TYPE_WITH_BINARY_DOC_VALUES = freezeAndDeduplicateFieldType(ft);
+        }
+
         public static final TextSearchInfo TEXT_SEARCH_INFO = new TextSearchInfo(
             FIELD_TYPE,
             null,
@@ -214,6 +229,7 @@ public static final class Builder extends FieldMapper.DimensionBuilder {
         private final boolean forceDocValuesSkipper;
         private final SourceKeepMode indexSourceKeepMode;
         private final boolean isWithinMultiField;
+        private final boolean useBinaryDocValues;
 
         public Builder(final String name, final MappingParserContext mappingParserContext) {
             this(
@@ -227,7 +243,8 @@ public Builder(final String name, final MappingParserContext mappingParserContex
                 USE_DOC_VALUES_SKIPPER.get(mappingParserContext.getSettings()),
                 false,
                 mappingParserContext.getIndexSettings().sourceKeepMode(),
-                mappingParserContext.isWithinMultiField()
+                mappingParserContext.isWithinMultiField(),
+                USE_BINARY_DOC_VALUES.get(mappingParserContext.getSettings())
             );
         }
 
@@ -250,7 +267,8 @@ public Builder(final String name, final MappingParserContext mappingParserContex
                 false,
                 false,
                 sourceKeepMode,
-                isWithinMultiField
+                isWithinMultiField,
+                false
             );
         }
 
@@ -265,7 +283,8 @@ private Builder(
             boolean enableDocValuesSkipper,
             boolean forceDocValuesSkipper,
             SourceKeepMode indexSourceKeepMode,
-            boolean isWithinMultiField
+            boolean isWithinMultiField,
+            boolean binaryDocValuesEnabled
         ) {
             super(name);
             this.indexAnalyzers = indexAnalyzers;
@@ -301,6 +320,7 @@ private Builder(
             this.forceDocValuesSkipper = forceDocValuesSkipper;
             this.indexSourceKeepMode = indexSourceKeepMode;
             this.isWithinMultiField = isWithinMultiField;
+            this.useBinaryDocValues = binaryDocValuesEnabled;
         }
 
         public Builder(String name, IndexVersion indexCreatedVersion) {
@@ -330,7 +350,8 @@ public static Builder buildWithDocValuesSkipper(
                 enableDocValuesSkipper,
                 true,
                 SourceKeepMode.NONE,
-                isWithinMultiField
+                isWithinMultiField,
+                false
             );
         }
 
@@ -413,7 +434,7 @@ protected Parameter<?>[] getParameters() {
                 dimension };
         }
 
-        private KeywordFieldType buildFieldType(MapperBuilderContext context, FieldType fieldType) {
+        private KeywordFieldType buildFieldType(MapperBuilderContext context, FieldType fieldType, boolean useBinaryDocValues) {
             NamedAnalyzer normalizer = Lucene.KEYWORD_ANALYZER;
             NamedAnalyzer searchAnalyzer = Lucene.KEYWORD_ANALYZER;
             NamedAnalyzer quoteAnalyzer = Lucene.KEYWORD_ANALYZER;
@@ -448,20 +469,25 @@ private KeywordFieldType buildFieldType(MapperBuilderContext context, FieldType
                 searchAnalyzer,
                 quoteAnalyzer,
                 this,
-                context.isSourceSynthetic()
+                context.isSourceSynthetic(),
+                useBinaryDocValues
             );
         }
 
         @Override
         public KeywordFieldMapper build(MapperBuilderContext context) {
+            String fullName = context.buildFullName(leafName());
+            // Index sorting by binary doc values not support (yet):
+            boolean useBinaryDocValues = fullName.equals("host.name") == false && this.useBinaryDocValues;
             FieldType fieldtype = resolveFieldType(
+                useBinaryDocValues,
                 enableDocValuesSkipper,
                 forceDocValuesSkipper,
                 hasDocValues,
                 indexCreatedVersion,
                 indexSortConfig,
                 indexMode,
-                context.buildFullName(leafName())
+                fullName
             );
             fieldtype.setOmitNorms(this.hasNorms.getValue() == false);
             fieldtype.setStored(this.stored.getValue());
@@ -492,15 +518,17 @@ public KeywordFieldMapper build(MapperBuilderContext context) {
             return new KeywordFieldMapper(
                 leafName(),
                 fieldtype,
-                buildFieldType(context, fieldtype),
+                buildFieldType(context, fieldtype, useBinaryDocValues),
                 builderParams(this, context),
                 this,
                 offsetsFieldName,
-                indexSourceKeepMode
+                indexSourceKeepMode,
+                useBinaryDocValues
             );
         }
 
         private static FieldType resolveFieldType(
+            final boolean useBinaryDocValues,
             final boolean enableDocValuesSkipper,
             final boolean forceDocValuesSkipper,
             final Parameter<Boolean> hasDocValues,
@@ -509,6 +537,10 @@ private static FieldType resolveFieldType(
             final IndexMode indexMode,
             final String fullFieldName
         ) {
+            if (useBinaryDocValues) {
+                return new FieldType(Defaults.FIELD_TYPE_WITH_BINARY_DOC_VALUES);
+            }
+
             if (enableDocValuesSkipper) {
                 if (forceDocValuesSkipper) {
                     assert hasDocValues.getValue();
@@ -553,6 +585,7 @@ public static final class KeywordFieldType extends TextFamilyFieldType {
         private final boolean isDimension;
         private final IndexSortConfig indexSortConfig;
         private final boolean hasDocValuesSkipper;
+        private final boolean useBinaryDocValues;
 
         public KeywordFieldType(
             String name,
@@ -561,7 +594,8 @@ public KeywordFieldType(
             NamedAnalyzer searchAnalyzer,
             NamedAnalyzer quoteAnalyzer,
             Builder builder,
-            boolean isSyntheticSource
+            boolean isSyntheticSource,
+            boolean useBinaryDocValues
         ) {
             super(
                 name,
@@ -581,6 +615,7 @@ public KeywordFieldType(
             this.isDimension = builder.dimension.getValue();
             this.indexSortConfig = builder.indexSortConfig;
             this.hasDocValuesSkipper = DocValuesSkipIndexType.NONE.equals(fieldType.docValuesSkipIndexType()) == false;
+            this.useBinaryDocValues = useBinaryDocValues;
         }
 
         public KeywordFieldType(String name) {
@@ -597,6 +632,7 @@ public KeywordFieldType(String name, boolean isIndexed, boolean hasDocValues, Ma
             this.isDimension = false;
             this.indexSortConfig = null;
             this.hasDocValuesSkipper = false;
+            this.useBinaryDocValues = false;
         }
 
         public KeywordFieldType(String name, FieldType fieldType) {
@@ -618,6 +654,7 @@ public KeywordFieldType(String name, FieldType fieldType) {
             this.isDimension = false;
             this.indexSortConfig = null;
             this.hasDocValuesSkipper = DocValuesSkipIndexType.NONE.equals(fieldType.docValuesSkipIndexType()) == false;
+            this.useBinaryDocValues = false;
         }
 
         public KeywordFieldType(String name, NamedAnalyzer analyzer) {
@@ -639,6 +676,7 @@ public KeywordFieldType(String name, NamedAnalyzer analyzer) {
             this.isDimension = false;
             this.indexSortConfig = null;
             this.hasDocValuesSkipper = false;
+            this.useBinaryDocValues = false;
         }
 
         @Override
@@ -799,6 +837,10 @@ NamedAnalyzer normalizer() {
 
         @Override
         public BlockLoader blockLoader(BlockLoaderContext blContext) {
+            if (useBinaryDocValues) {
+                return new BlockDocValuesReader.BytesRefsFromBinaryBlockLoader(name());
+            }
+
             if (hasDocValues() && (blContext.fieldExtractPreference() != FieldExtractPreference.STORED || isSyntheticSourceEnabled())) {
                 return new BlockDocValuesReader.BytesRefsFromOrdsBlockLoader(name());
             }
@@ -1119,6 +1161,7 @@ public Query automatonQuery(
     private final boolean forceDocValuesSkipper;
     private final String offsetsFieldName;
     private final SourceKeepMode indexSourceKeepMode;
+    private final boolean useBinaryDocValues;
 
     private KeywordFieldMapper(
         String simpleName,
@@ -1127,7 +1170,8 @@ private KeywordFieldMapper(
         BuilderParams builderParams,
         Builder builder,
         String offsetsFieldName,
-        SourceKeepMode indexSourceKeepMode
+        SourceKeepMode indexSourceKeepMode,
+        boolean useBinaryDocValues
     ) {
         super(simpleName, mappedFieldType, builderParams);
         assert fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) <= 0;
@@ -1148,6 +1192,7 @@ private KeywordFieldMapper(
         this.forceDocValuesSkipper = builder.forceDocValuesSkipper;
         this.offsetsFieldName = offsetsFieldName;
         this.indexSourceKeepMode = indexSourceKeepMode;
+        this.useBinaryDocValues = useBinaryDocValues;
     }
 
     @Override
@@ -1210,6 +1255,13 @@ private boolean indexValue(DocumentParserContext context, XContentString value)
             return false;
         }
 
+        if (useBinaryDocValues) {
+            var utfBytes = value.bytes();
+            var binaryValue = new BytesRef(utfBytes.bytes(), utfBytes.offset(), utfBytes.length());
+            context.doc().add(new BinaryDocValuesField(fieldType().name(), binaryValue));
+            return true;
+        }
+
         // if the value's length exceeds ignore_above, then don't index it
         if (fieldType().ignoreAbove().isIgnored(value)) {
             context.addIgnoredField(fullPath());
@@ -1316,7 +1368,8 @@ public FieldMapper.Builder getMergeBuilder() {
             enableDocValuesSkipper,
             forceDocValuesSkipper,
             indexSourceKeepMode,
-            fieldType().isWithinMultiField()
+            fieldType().isWithinMultiField(),
+            useBinaryDocValues
         ).dimension(fieldType().isDimension()).init(this);
     }
 
@@ -1365,22 +1418,26 @@ protected void writeValue(Object value, XContentBuilder b) throws IOException {
                 }
             });
         } else if (hasDocValues) {
-            if (offsetsFieldName != null) {
-                layers.add(new SortedSetWithOffsetsDocValuesSyntheticFieldLoaderLayer(fullPath(), offsetsFieldName));
+            if (useBinaryDocValues) {
+                layers.add(new BinarySyntheticFieldLoader());
             } else {
-                layers.add(new SortedSetDocValuesSyntheticFieldLoaderLayer(fullPath()) {
-
-                    @Override
-                    protected BytesRef convert(BytesRef value) {
-                        return value;
-                    }
-
-                    @Override
-                    protected BytesRef preserve(BytesRef value) {
-                        // Preserve must make a deep copy because convert gets a shallow copy from the iterator
-                        return BytesRef.deepCopyOf(value);
-                    }
-                });
+                if (offsetsFieldName != null) {
+                    layers.add(new SortedSetWithOffsetsDocValuesSyntheticFieldLoaderLayer(fullPath(), offsetsFieldName));
+                } else {
+                    layers.add(new SortedSetDocValuesSyntheticFieldLoaderLayer(fullPath()) {
+
+                        @Override
+                        protected BytesRef convert(BytesRef value) {
+                            return value;
+                        }
+
+                        @Override
+                        protected BytesRef preserve(BytesRef value) {
+                            // Preserve must make a deep copy because convert gets a shallow copy from the iterator
+                            return BytesRef.deepCopyOf(value);
+                        }
+                    });
+                }
             }
         }
 
@@ -1399,4 +1456,50 @@ protected void writeValue(Object value, XContentBuilder b) throws IOException {
 
         return new CompositeSyntheticFieldLoader(leafFieldName, fullFieldName, layers);
     }
+
+    final class BinarySyntheticFieldLoader implements CompositeSyntheticFieldLoader.DocValuesLayer {
+        private int docValueCount;
+        private BytesRef docValueBytes;
+
+        @Override
+        public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf) throws IOException {
+            BinaryDocValues values = leafReader.getBinaryDocValues(fullPath());
+            if (values == null) {
+                docValueCount = 0;
+                return null;
+            }
+
+            return docId -> {
+                if (values.advanceExact(docId) == false) {
+                    docValueCount = 0;
+                    return hasValue();
+                }
+                docValueBytes = BytesRef.deepCopyOf(values.binaryValue());
+                docValueCount = 1;
+                return hasValue();
+            };
+        }
+
+        @Override
+        public boolean hasValue() {
+            return docValueCount > 0;
+        }
+
+        @Override
+        public long valueCount() {
+            return docValueCount;
+        }
+
+        @Override
+        public void write(XContentBuilder b) throws IOException {
+            if (hasValue()) {
+                b.utf8Value(docValueBytes.bytes, docValueBytes.offset, docValueBytes.length);
+            }
+        }
+
+        @Override
+        public String fieldName() {
+            return fullPath();
+        }
+    }
 }
diff --git a/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldTypeTests.java
index 815f19ec2cfb6..ed0038b4645a5 100644
--- a/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldTypeTests.java
+++ b/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldTypeTests.java
@@ -322,7 +322,8 @@ public void testIgnoreAboveIndexLevelSetting() {
             mock(NamedAnalyzer.class),
             mock(NamedAnalyzer.class),
             builder,
-            true
+            true,
+            false
         );
 
         // when/then
@@ -354,7 +355,8 @@ public void testIgnoreAboveIsSetReturnsTrueWhenIgnoreAboveIsGiven() {
             mock(NamedAnalyzer.class),
             mock(NamedAnalyzer.class),
             builder,
-            true
+            true,
+            false
         );
 
         // when/then
@@ -385,7 +387,8 @@ public void testIgnoreAboveIsSetReturnsFalseWhenIgnoreAboveIsNotGiven() {
             mock(NamedAnalyzer.class),
             mock(NamedAnalyzer.class),
             builder,
-            true
+            true,
+            false
         );
 
         // when/then
@@ -417,7 +420,8 @@ public void testIgnoreAboveIsSetReturnsFalseWhenIgnoreAboveIsGivenButItsTheSameA
             mock(NamedAnalyzer.class),
             mock(NamedAnalyzer.class),
             builder,
-            true
+            true,
+            false
         );
 
         // when/then
@@ -449,7 +453,8 @@ public void testIgnoreAboveIsSetReturnsFalseWhenIgnoreAboveIsGivenButItsTheSameA
             mock(NamedAnalyzer.class),
             mock(NamedAnalyzer.class),
             builder,
-            true
+            true,
+            false
         );
 
         // when/then
@@ -481,7 +486,8 @@ public void testIgnoreAboveIsSetReturnsTrueWhenIgnoreAboveIsGivenAsLogsdbDefault
             mock(NamedAnalyzer.class),
             mock(NamedAnalyzer.class),
             builder,
-            true
+            true,
+            false
         );
 
         // when/then
@@ -513,7 +519,8 @@ public void testIgnoreAboveIsSetReturnsTrueWhenIgnoreAboveIsConfiguredAtIndexLev
             mock(NamedAnalyzer.class),
             mock(NamedAnalyzer.class),
             builder,
-            true
+            true,
+            false
         );
 
         // when/then
diff --git a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldTypeTests.java
index 82f1d3a0b687c..62dfb2d90fc68 100644
--- a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldTypeTests.java
+++ b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldTypeTests.java
@@ -357,7 +357,8 @@ public void testBlockLoaderDoesNotUseSyntheticSourceDelegateWhenIgnoreAboveIsSet
             mock(NamedAnalyzer.class),
             mock(NamedAnalyzer.class),
             builder,
-            true
+            true,
+            false
         );
 
         TextFieldType ft = new TextFieldType(
@@ -406,7 +407,8 @@ public void testBlockLoaderDoesNotUseSyntheticSourceDelegateWhenIgnoreAboveIsSet
             mock(NamedAnalyzer.class),
             mock(NamedAnalyzer.class),
             builder,
-            true
+            true,
+            false
         );
 
         TextFieldType ft = new TextFieldType(
diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValueSourceReaderTypeConversionTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValueSourceReaderTypeConversionTests.java
index 4e562217a00e3..1d18d479d3265 100644
--- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValueSourceReaderTypeConversionTests.java
+++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValueSourceReaderTypeConversionTests.java
@@ -1393,8 +1393,8 @@ private KeywordFieldMapper.KeywordFieldType storedKeywordField(String name) {
             Lucene.KEYWORD_ANALYZER,
             Lucene.KEYWORD_ANALYZER,
             new KeywordFieldMapper.Builder(name, IndexVersion.current()).docValues(false),
-            true // TODO randomize - load from stored keyword fields if stored even in synthetic source
-        );
+            true, // TODO randomize - load from stored keyword fields if stored even in synthetic source
+                useBinaryDocValues);
     }
 
     @AwaitsFix(bugUrl = "Get working for multiple indices")
diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValuesSourceReaderOperatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValuesSourceReaderOperatorTests.java
index 3a83d365f6f57..dd51c21f1707f 100644
--- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValuesSourceReaderOperatorTests.java
+++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValuesSourceReaderOperatorTests.java
@@ -1580,8 +1580,8 @@ private KeywordFieldMapper.KeywordFieldType storedKeywordField(String name) {
             Lucene.KEYWORD_ANALYZER,
             Lucene.KEYWORD_ANALYZER,
             new KeywordFieldMapper.Builder(name, IndexVersion.current()).docValues(false),
-            true // TODO randomize - load from stored keyword fields if stored even in synthetic source
-        );
+            true, // TODO randomize - load from stored keyword fields if stored even in synthetic source
+                useBinaryDocValues);
     }
 
     private TextFieldMapper.TextFieldType storedTextField(String name) {
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java
index 29c98a073938e..3cec096140263 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java
@@ -240,7 +240,8 @@ static MappedFieldType createUnmappedFieldType(String name, DefaultShardContext
                 Lucene.KEYWORD_ANALYZER,
                 Lucene.KEYWORD_ANALYZER,
                 builder,
-                context.ctx.isSourceSynthetic()
+                context.ctx.isSourceSynthetic(),
+                false
             );
         }
     }
diff --git a/x-pack/plugin/logsdb/src/yamlRestTest/resources/rest-api-spec/test/keyword_use_binary_doc_values.yml b/x-pack/plugin/logsdb/src/yamlRestTest/resources/rest-api-spec/test/keyword_use_binary_doc_values.yml
new file mode 100644
index 0000000000000..2f4d22ba233e5
--- /dev/null
+++ b/x-pack/plugin/logsdb/src/yamlRestTest/resources/rest-api-spec/test/keyword_use_binary_doc_values.yml
@@ -0,0 +1,117 @@
+---
+setup:
+  - do:
+      indices.create:
+        index: my-index
+        body:
+          settings:
+            index:
+              mapping:
+                use_binary_doc_values: true
+              mode: logsdb
+          mappings:
+            properties:
+              "@timestamp":
+                type: date
+              host.name:
+                type: keyword
+              agent_id:
+                type: keyword
+              process_id:
+                type: integer
+              http_method:
+                type: keyword
+              is_https:
+                type: boolean
+              location:
+                type: geo_point
+              message:
+                type: text
+
+  - do:
+      bulk:
+        index: my-index
+        refresh: true
+        body:
+          - { "index": { } }
+          - { "@timestamp": "2024-02-12T10:30:00Z", "host.name": "foo", "agent_id": "darth-vader", "process_id": 101, "http_method": "GET", "is_https": false, "location": {"lat" : 40.7128, "lon" : -74.0060}, "message": "No, I am your father." }
+          - { "index": { } }
+          - { "@timestamp": "2024-02-12T10:31:00Z", "host.name": "bar", "agent_id": "yoda", "process_id": 102, "http_method": "PUT", "is_https": false, "location": {"lat" : 40.7128, "lon" : -74.0060}, "message": "Do. Or do not. There is no try." }
+          - { "index": { } }
+          - { "@timestamp": "2024-02-12T10:32:00Z", "host.name": "foo", "agent_id": "obi-wan", "process_id": 103, "http_method": "GET", "is_https": false, "location": {"lat" : 40.7128, "lon" : -74.0060}, "message": "May the force be with you." }
+          - { "index": { } }
+          - { "@timestamp": "2024-02-12T10:33:00Z", "host.name": "baz", "agent_id": "darth-vader", "process_id": 102, "http_method": "POST", "is_https": true, "location": {"lat" : 40.7128, "lon" : -74.0060}, "message": "I find your lack of faith disturbing." }
+          - { "index": { } }
+          - { "@timestamp": "2024-02-12T10:34:00Z", "host.name": "baz", "agent_id": "yoda", "process_id": 104, "http_method": "POST", "is_https": false, "location": {"lat" : 40.7128, "lon" : -74.0060}, "message": "Wars not make one great." }
+          - { "index": { } }
+          - { "@timestamp": "2024-02-12T10:35:00Z", "host.name": "foo", "agent_id": "obi-wan", "process_id": 105, "http_method": "GET", "is_https": false, "location": {"lat" : 40.7128, "lon" : -74.0060}, "message": "That's no moon. It's a space station." }
+
+---
+teardown:
+  - do:
+      indices.delete:
+        index: my-index
+
+---
+"Simple from":
+  - do:
+      esql.query:
+        body:
+          query: 'FROM my-index | SORT host.name, @timestamp | LIMIT 1'
+
+  - match: {columns.0.name: "@timestamp"}
+  - match: {columns.0.type: "date"}
+  - match: {columns.1.name: "agent_id"}
+  - match: {columns.1.type: "keyword"}
+  - match: {columns.2.name: "host.name"}
+  - match: {columns.2.type: "keyword"}
+  - match: {columns.3.name: "http_method" }
+  - match: {columns.3.type: "keyword" }
+  - match: {columns.4.name: "is_https"}
+  - match: {columns.4.type: "boolean"}
+  - match: {columns.5.name: "location"}
+  - match: {columns.5.type: "geo_point"}
+  - match: {columns.6.name: "message"}
+  - match: {columns.6.type: "text"}
+  - match: {columns.7.name: "process_id"}
+  - match: {columns.7.type: "integer"}
+
+  - match: {values.0.0: "2024-02-12T10:31:00.000Z"}
+  - match: {values.0.1: "yoda"}
+  - match: {values.0.2: "bar"}
+  - match: {values.0.3: "PUT"}
+  - match: {values.0.4: false}
+  - match: {values.0.5: "POINT (-74.00600004941225 40.712799984030426)"}
+  - match: {values.0.6: "Do. Or do not. There is no try."}
+  - match: {values.0.7: 102}
+
+---
+"Simple from keyword fields":
+  - do:
+      esql.query:
+        body:
+          query: 'FROM my-index | SORT host.name, @timestamp | KEEP agent_id, http_method | LIMIT 10'
+          profile: true
+
+  - match: {columns.0.name: "agent_id"}
+  - match: {columns.0.type: "keyword"}
+  - match: {columns.1.name: "http_method"}
+  - match: {columns.1.type: "keyword"}
+
+  - match: {values.0.0: "yoda"}
+  - match: {values.0.1: "PUT"}
+  - match: {values.1.0: "darth-vader"}
+  - match: {values.1.1: "POST"}
+  - match: {values.2.0: "yoda"}
+  - match: {values.2.1: "POST"}
+  - match: {values.3.0: "darth-vader"}
+  - match: {values.3.1: "GET"}
+  - match: {values.4.0: "obi-wan"}
+  - match: {values.4.1: "GET"}
+  - match: {values.5.0: "obi-wan"}
+  - match: {values.5.1: "GET"}
+
+  - match: {profile.drivers.0.description: "data"}
+  - match: {profile.drivers.0.operators.1.operator: "ValuesSourceReaderOperator[fields = [@timestamp, agent_id, host.name, http_method]]"}
+  - match: {profile.drivers.0.operators.1.status.readers_built.agent_id:row_stride:BlockDocValuesReader\\.Bytes: 1}
+  - match: {profile.drivers.0.operators.1.status.readers_built.http_method:row_stride:BlockDocValuesReader\\.Bytes: 1}

From c9a3a84d40676700e9b4160a658e8fa7eaced302 Mon Sep 17 00:00:00 2001
From: elasticsearchmachine <infra-root+elasticsearchmachine@elastic.co>
Date: Mon, 6 Oct 2025 10:30:40 +0000
Subject: [PATCH 02/15] [CI] Auto commit changes from spotless

---
 .../_nightly/esql/ValuesSourceReaderBenchmark.java          | 3 ++-
 .../index/mapper/extras/MatchOnlyTextFieldTypeTests.java    | 6 ++++--
 .../lucene/read/ValueSourceReaderTypeConversionTests.java   | 3 ++-
 .../lucene/read/ValuesSourceReaderOperatorTests.java        | 3 ++-
 4 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/_nightly/esql/ValuesSourceReaderBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/_nightly/esql/ValuesSourceReaderBenchmark.java
index b9756c267e0b5..6add290ca52b5 100644
--- a/benchmarks/src/main/java/org/elasticsearch/benchmark/_nightly/esql/ValuesSourceReaderBenchmark.java
+++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/_nightly/esql/ValuesSourceReaderBenchmark.java
@@ -223,7 +223,8 @@ private static BlockLoader blockLoader(String name) {
                 Lucene.KEYWORD_ANALYZER,
                 new KeywordFieldMapper.Builder(name, IndexVersion.current()).docValues(ft.docValuesType() != DocValuesType.NONE),
                 syntheticSource,
-                    useBinaryDocValues).blockLoader(new MappedFieldType.BlockLoaderContext() {
+                useBinaryDocValues
+            ).blockLoader(new MappedFieldType.BlockLoaderContext() {
                 @Override
                 public String indexName() {
                     return "benchmark";
diff --git a/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldTypeTests.java b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldTypeTests.java
index bbbdacea062e3..41a0559453372 100644
--- a/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldTypeTests.java
+++ b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldTypeTests.java
@@ -299,7 +299,8 @@ public void testBlockLoaderDoesNotUseSyntheticSourceDelegateWhenIgnoreAboveIsSet
             mock(NamedAnalyzer.class),
             builder,
             true,
-                useBinaryDocValues);
+            useBinaryDocValues
+        );
 
         MatchOnlyTextFieldMapper.MatchOnlyTextFieldType ft = new MatchOnlyTextFieldMapper.MatchOnlyTextFieldType(
             "parent",
@@ -347,7 +348,8 @@ public void testBlockLoaderDoesNotUseSyntheticSourceDelegateWhenIgnoreAboveIsSet
             mock(NamedAnalyzer.class),
             builder,
             true,
-                useBinaryDocValues);
+            useBinaryDocValues
+        );
 
         MatchOnlyTextFieldMapper.MatchOnlyTextFieldType ft = new MatchOnlyTextFieldMapper.MatchOnlyTextFieldType(
             "parent",
diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValueSourceReaderTypeConversionTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValueSourceReaderTypeConversionTests.java
index 1d18d479d3265..0636b6027b57a 100644
--- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValueSourceReaderTypeConversionTests.java
+++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValueSourceReaderTypeConversionTests.java
@@ -1394,7 +1394,8 @@ private KeywordFieldMapper.KeywordFieldType storedKeywordField(String name) {
             Lucene.KEYWORD_ANALYZER,
             new KeywordFieldMapper.Builder(name, IndexVersion.current()).docValues(false),
             true, // TODO randomize - load from stored keyword fields if stored even in synthetic source
-                useBinaryDocValues);
+            useBinaryDocValues
+        );
     }
 
     @AwaitsFix(bugUrl = "Get working for multiple indices")
diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValuesSourceReaderOperatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValuesSourceReaderOperatorTests.java
index dd51c21f1707f..2b0db9bfa484b 100644
--- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValuesSourceReaderOperatorTests.java
+++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValuesSourceReaderOperatorTests.java
@@ -1581,7 +1581,8 @@ private KeywordFieldMapper.KeywordFieldType storedKeywordField(String name) {
             Lucene.KEYWORD_ANALYZER,
             new KeywordFieldMapper.Builder(name, IndexVersion.current()).docValues(false),
             true, // TODO randomize - load from stored keyword fields if stored even in synthetic source
-                useBinaryDocValues);
+            useBinaryDocValues
+        );
     }
 
     private TextFieldMapper.TextFieldType storedTextField(String name) {

From d2b30bd592268fd27c1a0d7e691f3f2d358addbf Mon Sep 17 00:00:00 2001
From: Martijn van Groningen <martijn.v.groningen@gmail.com>
Date: Mon, 6 Oct 2025 12:49:13 +0200
Subject: [PATCH 03/15] fix compile error

---
 .../benchmark/_nightly/esql/ValuesSourceReaderBenchmark.java    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/_nightly/esql/ValuesSourceReaderBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/_nightly/esql/ValuesSourceReaderBenchmark.java
index 6add290ca52b5..cb929d4437722 100644
--- a/benchmarks/src/main/java/org/elasticsearch/benchmark/_nightly/esql/ValuesSourceReaderBenchmark.java
+++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/_nightly/esql/ValuesSourceReaderBenchmark.java
@@ -223,7 +223,7 @@ private static BlockLoader blockLoader(String name) {
                 Lucene.KEYWORD_ANALYZER,
                 new KeywordFieldMapper.Builder(name, IndexVersion.current()).docValues(ft.docValuesType() != DocValuesType.NONE),
                 syntheticSource,
-                useBinaryDocValues
+                false
             ).blockLoader(new MappedFieldType.BlockLoaderContext() {
                 @Override
                 public String indexName() {

From 511b0328825c2bb86987b6a24e6a99f9874ff3dd Mon Sep 17 00:00:00 2001
From: Martijn van Groningen <martijn.v.groningen@gmail.com>
Date: Mon, 6 Oct 2025 12:50:56 +0200
Subject: [PATCH 04/15] Fix clickbench queries with wildcard queries to work
 work with binary doc values.

---
 .../index/mapper/KeywordFieldMapper.java      | 13 ++++-
 .../BinaryDocValuesStringFieldScript.java     | 56 +++++++++++++++++++
 2 files changed, 67 insertions(+), 2 deletions(-)
 create mode 100644 server/src/main/java/org/elasticsearch/script/BinaryDocValuesStringFieldScript.java

diff --git a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java
index eee62b72ccac8..3a65ea0fd3312 100644
--- a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java
@@ -53,12 +53,16 @@
 import org.elasticsearch.index.fielddata.FieldData;
 import org.elasticsearch.index.fielddata.FieldDataContext;
 import org.elasticsearch.index.fielddata.IndexFieldData;
+import org.elasticsearch.index.fielddata.IndexFieldDataCache;
 import org.elasticsearch.index.fielddata.SourceValueFetcherSortedBinaryIndexFieldData;
 import org.elasticsearch.index.fielddata.StoredFieldSortedBinaryIndexFieldData;
+import org.elasticsearch.index.fielddata.plain.BinaryIndexFieldData;
 import org.elasticsearch.index.fielddata.plain.SortedSetOrdinalsIndexFieldData;
 import org.elasticsearch.index.query.AutomatonQueryWithDescription;
 import org.elasticsearch.index.query.SearchExecutionContext;
 import org.elasticsearch.index.similarity.SimilarityProvider;
+import org.elasticsearch.indices.breaker.CircuitBreakerService;
+import org.elasticsearch.script.BinaryDocValuesStringFieldScript;
 import org.elasticsearch.script.Script;
 import org.elasticsearch.script.ScriptCompiler;
 import org.elasticsearch.script.SortedSetDocValuesStringFieldScript;
@@ -956,7 +960,10 @@ protected BytesRef storedToBytesRef(Object stored) {
             );
         }
 
-        private SortedSetOrdinalsIndexFieldData.Builder fieldDataFromDocValues() {
+        private IndexFieldData.Builder fieldDataFromDocValues() {
+            if (useBinaryDocValues) {
+                return new BinaryIndexFieldData.Builder(name(), CoreValuesSourceType.KEYWORD);
+            }
             return new SortedSetOrdinalsIndexFieldData.Builder(
                 name(),
                 CoreValuesSourceType.KEYWORD,
@@ -1041,7 +1048,9 @@ public Query wildcardQuery(
                 }
                 return new StringScriptFieldWildcardQuery(
                     new Script(""),
-                    ctx -> new SortedSetDocValuesStringFieldScript(name(), context.lookup(), ctx),
+                    ctx -> useBinaryDocValues
+                        ? new BinaryDocValuesStringFieldScript(name(), context.lookup(), ctx)
+                        : new SortedSetDocValuesStringFieldScript(name(), context.lookup(), ctx),
                     name(),
                     value,
                     caseInsensitive
diff --git a/server/src/main/java/org/elasticsearch/script/BinaryDocValuesStringFieldScript.java b/server/src/main/java/org/elasticsearch/script/BinaryDocValuesStringFieldScript.java
new file mode 100644
index 0000000000000..be5aba49ec820
--- /dev/null
+++ b/server/src/main/java/org/elasticsearch/script/BinaryDocValuesStringFieldScript.java
@@ -0,0 +1,56 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.script;
+
+import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.index.mapper.OnScriptError;
+import org.elasticsearch.search.lookup.SearchLookup;
+
+import java.io.IOException;
+import java.util.Map;
+
+public class BinaryDocValuesStringFieldScript extends StringFieldScript {
+    private final BinaryDocValues binaryDocValues;
+
+    boolean hasValue = false;
+
+    public BinaryDocValuesStringFieldScript(String fieldName, SearchLookup searchLookup, LeafReaderContext ctx) {
+        super(fieldName, Map.of(), searchLookup, OnScriptError.FAIL, ctx);
+        try {
+            binaryDocValues = DocValues.getBinary(ctx.reader(), fieldName);
+        } catch (IOException e) {
+            throw new IllegalStateException("Cannot load doc values", e);
+        }
+    }
+
+    @Override
+    public void setDocument(int docID) {
+        try {
+            hasValue = binaryDocValues.advanceExact(docID);
+        } catch (IOException e) {
+            throw new IllegalStateException("Cannot load doc values", e);
+        }
+    }
+
+    @Override
+    public void execute() {
+        try {
+            if (hasValue) {
+                BytesRef bytesRef = binaryDocValues.binaryValue();
+                emit(bytesRef.utf8ToString());
+            }
+        } catch (IOException e) {
+            throw new IllegalStateException("Cannot load doc values", e);
+        }
+    }
+}

From adfca29fb5741a399096c6781475e2b85a7d20c6 Mon Sep 17 00:00:00 2001
From: Martijn van Groningen <martijn.v.groningen@gmail.com>
Date: Mon, 6 Oct 2025 12:56:50 +0200
Subject: [PATCH 05/15] bulk load dense binary doc values.

---
 .../es819/ES819TSDBDocValuesProducer.java     | 44 ++++++++++++++++++-
 .../index/mapper/BlockDocValuesReader.java    | 11 +++++
 2 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java
index 5d90f2814853d..e26f8d04965ae 100644
--- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java
@@ -208,6 +208,26 @@ public BytesRef binaryValue() throws IOException {
                         bytesSlice.readBytes((long) doc * length, bytes.bytes, 0, length);
                         return bytes;
                     }
+
+                    @Override
+                    public BlockLoader.Block tryRead(
+                        BlockLoader.BlockFactory factory,
+                        BlockLoader.Docs docs,
+                        int offset,
+                        boolean nullsFiltered,
+                        BlockDocValuesReader.ToDouble toDouble,
+                        boolean toInt
+                    ) throws IOException {
+                        int count = docs.count() - offset;
+                        try (var builder = factory.bytesRefs(count)) {
+                            for (int i = offset; i < docs.count(); i++) {
+                                doc = docs.get(i);
+                                bytesSlice.readBytes((long) doc * length, bytes.bytes, 0, length);
+                                builder.appendBytesRef(bytes);
+                            }
+                            return builder.build();
+                        }
+                    }
                 };
             } else {
                 // variable length
@@ -223,6 +243,28 @@ public BytesRef binaryValue() throws IOException {
                         bytesSlice.readBytes(startOffset, bytes.bytes, 0, bytes.length);
                         return bytes;
                     }
+
+                    @Override
+                    public BlockLoader.Block tryRead(
+                        BlockLoader.BlockFactory factory,
+                        BlockLoader.Docs docs,
+                        int offset,
+                        boolean nullsFiltered,
+                        BlockDocValuesReader.ToDouble toDouble,
+                        boolean toInt
+                    ) throws IOException {
+                        int count = docs.count() - offset;
+                        try (var builder = factory.bytesRefs(count)) {
+                            for (int i = offset; i < docs.count(); i++) {
+                                doc = docs.get(i);
+                                long startOffset = addresses.get(doc);
+                                bytes.length = (int) (addresses.get(doc + 1L) - startOffset);
+                                bytesSlice.readBytes(startOffset, bytes.bytes, 0, bytes.length);
+                                builder.appendBytesRef(bytes);
+                            }
+                            return builder.build();
+                        }
+                    }
                 };
             }
         } else {
@@ -267,7 +309,7 @@ public BytesRef binaryValue() throws IOException {
         }
     }
 
-    private abstract static class DenseBinaryDocValues extends BinaryDocValues {
+    private abstract static class DenseBinaryDocValues extends BinaryDocValues implements BlockLoader.OptionalColumnAtATimeReader {
 
         final int maxDoc;
         int doc = -1;
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java b/server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java
index 795401dd3e3e3..de35a85e8878c 100644
--- a/server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java
@@ -1032,6 +1032,17 @@ public BytesRefsFromBinary(BinaryDocValues docValues) {
             super(docValues);
         }
 
+        @Override
+        public BlockLoader.Block read(BlockFactory factory, Docs docs, int offset, boolean nullsFiltered) throws IOException {
+            if (docValues instanceof BlockLoader.OptionalColumnAtATimeReader direct) {
+                BlockLoader.Block block = direct.tryRead(factory, docs, offset, nullsFiltered, null, false);
+                if (block != null) {
+                    return block;
+                }
+            }
+            return super.read(factory, docs, offset, nullsFiltered);
+        }
+
         @Override
         void read(int doc, BytesRefBuilder builder) throws IOException {
             if (false == docValues.advanceExact(doc)) {

From 32c1d7b633cbf751ce77676e56ac5a55a3eb13f8 Mon Sep 17 00:00:00 2001
From: Martijn van Groningen <martijn.v.groningen@gmail.com>
Date: Mon, 6 Oct 2025 13:38:27 +0200
Subject: [PATCH 06/15] test compile errors

---
 .../index/mapper/extras/MatchOnlyTextFieldTypeTests.java      | 4 ++--
 .../compute/lucene/read/ValuesSourceReaderOperatorTests.java  | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldTypeTests.java b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldTypeTests.java
index 41a0559453372..0042810241be7 100644
--- a/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldTypeTests.java
+++ b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldTypeTests.java
@@ -299,7 +299,7 @@ public void testBlockLoaderDoesNotUseSyntheticSourceDelegateWhenIgnoreAboveIsSet
             mock(NamedAnalyzer.class),
             builder,
             true,
-            useBinaryDocValues
+            false
         );
 
         MatchOnlyTextFieldMapper.MatchOnlyTextFieldType ft = new MatchOnlyTextFieldMapper.MatchOnlyTextFieldType(
@@ -348,7 +348,7 @@ public void testBlockLoaderDoesNotUseSyntheticSourceDelegateWhenIgnoreAboveIsSet
             mock(NamedAnalyzer.class),
             builder,
             true,
-            useBinaryDocValues
+            false
         );
 
         MatchOnlyTextFieldMapper.MatchOnlyTextFieldType ft = new MatchOnlyTextFieldMapper.MatchOnlyTextFieldType(
diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValuesSourceReaderOperatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValuesSourceReaderOperatorTests.java
index 2b0db9bfa484b..8c0e57e942d05 100644
--- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValuesSourceReaderOperatorTests.java
+++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValuesSourceReaderOperatorTests.java
@@ -1581,7 +1581,7 @@ private KeywordFieldMapper.KeywordFieldType storedKeywordField(String name) {
             Lucene.KEYWORD_ANALYZER,
             new KeywordFieldMapper.Builder(name, IndexVersion.current()).docValues(false),
             true, // TODO randomize - load from stored keyword fields if stored even in synthetic source
-            useBinaryDocValues
+            false
         );
     }
 

From 5985192241f9d4259fef8c38b1245d33cd52ca4b Mon Sep 17 00:00:00 2001
From: Martijn van Groningen <martijn.v.groningen@gmail.com>
Date: Mon, 6 Oct 2025 14:57:10 +0200
Subject: [PATCH 07/15] test compile errors

---
 .../lucene/read/ValueSourceReaderTypeConversionTests.java       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValueSourceReaderTypeConversionTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValueSourceReaderTypeConversionTests.java
index 0636b6027b57a..79b14aa2c65ea 100644
--- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValueSourceReaderTypeConversionTests.java
+++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValueSourceReaderTypeConversionTests.java
@@ -1394,7 +1394,7 @@ private KeywordFieldMapper.KeywordFieldType storedKeywordField(String name) {
             Lucene.KEYWORD_ANALYZER,
             new KeywordFieldMapper.Builder(name, IndexVersion.current()).docValues(false),
             true, // TODO randomize - load from stored keyword fields if stored even in synthetic source
-            useBinaryDocValues
+            false
         );
     }
 

From e1b7d5057f19c7e1160cd87bac33e50cf74bb6cc Mon Sep 17 00:00:00 2001
From: elasticsearchmachine <infra-root+elasticsearchmachine@elastic.co>
Date: Mon, 6 Oct 2025 13:04:48 +0000
Subject: [PATCH 08/15] [CI] Auto commit changes from spotless

---
 .../java/org/elasticsearch/index/mapper/KeywordFieldMapper.java | 2 --
 1 file changed, 2 deletions(-)

diff --git a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java
index 3a65ea0fd3312..1ffa4a67bd248 100644
--- a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java
@@ -53,7 +53,6 @@
 import org.elasticsearch.index.fielddata.FieldData;
 import org.elasticsearch.index.fielddata.FieldDataContext;
 import org.elasticsearch.index.fielddata.IndexFieldData;
-import org.elasticsearch.index.fielddata.IndexFieldDataCache;
 import org.elasticsearch.index.fielddata.SourceValueFetcherSortedBinaryIndexFieldData;
 import org.elasticsearch.index.fielddata.StoredFieldSortedBinaryIndexFieldData;
 import org.elasticsearch.index.fielddata.plain.BinaryIndexFieldData;
@@ -61,7 +60,6 @@
 import org.elasticsearch.index.query.AutomatonQueryWithDescription;
 import org.elasticsearch.index.query.SearchExecutionContext;
 import org.elasticsearch.index.similarity.SimilarityProvider;
-import org.elasticsearch.indices.breaker.CircuitBreakerService;
 import org.elasticsearch.script.BinaryDocValuesStringFieldScript;
 import org.elasticsearch.script.Script;
 import org.elasticsearch.script.ScriptCompiler;

From e45360f1db3b15371377a7ed64f4d548125da2be Mon Sep 17 00:00:00 2001
From: Parker Timmins <parker.timmins@elastic.co>
Date: Mon, 6 Oct 2025 14:24:46 -0500
Subject: [PATCH 09/15] Copy uncompressed addBinaryDocValues

---
 .../codec/tsdb/BinaryDVCompressionMode.java   |  29 ++
 .../es819/ES819TSDBDocValuesConsumer.java     | 304 ++++++++++++++++++
 .../tsdb/es819/ES819TSDBDocValuesFormat.java  |  11 +-
 3 files changed, 341 insertions(+), 3 deletions(-)
 create mode 100644 server/src/main/java/org/elasticsearch/index/codec/tsdb/BinaryDVCompressionMode.java

diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/BinaryDVCompressionMode.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/BinaryDVCompressionMode.java
new file mode 100644
index 0000000000000..ddbd6d493c3b1
--- /dev/null
+++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/BinaryDVCompressionMode.java
@@ -0,0 +1,29 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.index.codec.tsdb;
+
+public enum BinaryDVCompressionMode {
+
+    NO_COMPRESS((byte) 0),
+    COMPRESSED_WITH_LZ4((byte) 1);
+
+    public final byte code;
+
+    BinaryDVCompressionMode(byte code) {
+        this.code = code;
+    }
+
+    static BinaryDVCompressionMode fromMode(byte mode) {
+        return switch (mode) {
+            case 0 -> NO_COMPRESS;
+            case 1 -> COMPRESSED_WITH_LZ4;
+            default -> throw new IllegalStateException("unknown compression mode [" + mode + "]");
+        };
+    }
+}
diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java
index 968e50eaf32be..51c6056ac70f4 100644
--- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java
@@ -9,10 +9,12 @@
 
 package org.elasticsearch.index.codec.tsdb.es819;
 
+import org.apache.lucene.backward_codecs.store.EndiannessReverserUtil;
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.codecs.DocValuesProducer;
 import org.apache.lucene.codecs.lucene90.IndexedDISI;
 import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.index.CorruptIndexException;
 import org.apache.lucene.index.DocValues;
 import org.apache.lucene.index.DocValuesSkipIndexType;
 import org.apache.lucene.index.FieldInfo;
@@ -29,6 +31,7 @@
 import org.apache.lucene.store.ByteArrayDataOutput;
 import org.apache.lucene.store.ByteBuffersDataOutput;
 import org.apache.lucene.store.ByteBuffersIndexOutput;
+import org.apache.lucene.store.ChecksumIndexInput;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.IndexOutput;
@@ -41,8 +44,10 @@
 import org.apache.lucene.util.packed.DirectMonotonicWriter;
 import org.apache.lucene.util.packed.PackedInts;
 import org.elasticsearch.core.IOUtils;
+import org.elasticsearch.index.codec.tsdb.BinaryDVCompressionMode;
 import org.elasticsearch.index.codec.tsdb.TSDBDocValuesEncoder;
 
+import java.io.Closeable;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -65,9 +70,12 @@ final class ES819TSDBDocValuesConsumer extends XDocValuesConsumer {
     private final int minDocsPerOrdinalForOrdinalRangeEncoding;
     final boolean enableOptimizedMerge;
     private final int primarySortFieldNumber;
+    final SegmentWriteState state;
+    final BinaryDVCompressionMode binaryDVCompressionMode;
 
     ES819TSDBDocValuesConsumer(
         SegmentWriteState state,
+        BinaryDVCompressionMode binaryDVCompressionMode,
         int skipIndexIntervalSize,
         int minDocsPerOrdinalForOrdinalRangeEncoding,
         boolean enableOptimizedMerge,
@@ -76,6 +84,8 @@ final class ES819TSDBDocValuesConsumer extends XDocValuesConsumer {
         String metaCodec,
         String metaExtension
     ) throws IOException {
+        this.state = state;
+        this.binaryDVCompressionMode = binaryDVCompressionMode;
         this.termsDictBuffer = new byte[1 << 14];
         this.dir = state.directory;
         this.minDocsPerOrdinalForOrdinalRangeEncoding = minDocsPerOrdinalForOrdinalRangeEncoding;
@@ -315,7 +325,143 @@ public void mergeBinaryField(FieldInfo mergeFieldInfo, MergeState mergeState) th
     public void addBinaryField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
         meta.writeInt(field.number);
         meta.writeByte(ES819TSDBDocValuesFormat.BINARY);
+        meta.writeByte(binaryDVCompressionMode.code);
+        switch (binaryDVCompressionMode) {
+            case NO_COMPRESS -> doAddUncompressedBinary(field, valuesProducer);
+            case COMPRESSED_WITH_LZ4 -> doAddCompressedBinaryLZ4(field, valuesProducer);
+        }
+    }
+
+    public void doAddUncompressedBinary(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
+        if (valuesProducer instanceof TsdbDocValuesProducer tsdbValuesProducer && tsdbValuesProducer.mergeStats.supported()) {
+            final int numDocsWithField = tsdbValuesProducer.mergeStats.sumNumDocsWithField();
+            final int minLength = tsdbValuesProducer.mergeStats.minLength();
+            final int maxLength = tsdbValuesProducer.mergeStats.maxLength();
+
+            assert numDocsWithField <= maxDoc;
+
+            BinaryDocValues values = valuesProducer.getBinary(field);
+            long start = data.getFilePointer();
+            meta.writeLong(start); // dataOffset
+
+            OffsetsAccumulator offsetsAccumulator = null;
+            DISIAccumulator disiAccumulator = null;
+            try {
+                if (numDocsWithField > 0 && numDocsWithField < maxDoc) {
+                    disiAccumulator = new DISIAccumulator(dir, context, data, IndexedDISI.DEFAULT_DENSE_RANK_POWER);
+                }
+
+                assert maxLength >= minLength;
+                if (maxLength > minLength) {
+                    offsetsAccumulator = new OffsetsAccumulator(dir, context, data, numDocsWithField);
+                }
+
+                for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
+                    BytesRef v = values.binaryValue();
+                    data.writeBytes(v.bytes, v.offset, v.length);
+                    if (disiAccumulator != null) {
+                        disiAccumulator.addDocId(doc);
+                    }
+                    if (offsetsAccumulator != null) {
+                        offsetsAccumulator.addDoc(v.length);
+                    }
+                }
+                meta.writeLong(data.getFilePointer() - start); // dataLength
+
+                if (numDocsWithField == 0) {
+                    meta.writeLong(-2); // docsWithFieldOffset
+                    meta.writeLong(0L); // docsWithFieldLength
+                    meta.writeShort((short) -1); // jumpTableEntryCount
+                    meta.writeByte((byte) -1); // denseRankPower
+                } else if (numDocsWithField == maxDoc) {
+                    meta.writeLong(-1); // docsWithFieldOffset
+                    meta.writeLong(0L); // docsWithFieldLength
+                    meta.writeShort((short) -1); // jumpTableEntryCount
+                    meta.writeByte((byte) -1); // denseRankPower
+                } else {
+                    long offset = data.getFilePointer();
+                    meta.writeLong(offset); // docsWithFieldOffset
+                    final short jumpTableEntryCount = disiAccumulator.build(data);
+                    meta.writeLong(data.getFilePointer() - offset); // docsWithFieldLength
+                    meta.writeShort(jumpTableEntryCount);
+                    meta.writeByte(IndexedDISI.DEFAULT_DENSE_RANK_POWER);
+                }
+
+                meta.writeInt(numDocsWithField);
+                meta.writeInt(minLength);
+                meta.writeInt(maxLength);
+                if (offsetsAccumulator != null) {
+                    offsetsAccumulator.build(meta, data);
+                }
+            } finally {
+                IOUtils.close(disiAccumulator, offsetsAccumulator);
+            }
+        } else {
+            BinaryDocValues values = valuesProducer.getBinary(field);
+            long start = data.getFilePointer();
+            meta.writeLong(start); // dataOffset
+            int numDocsWithField = 0;
+            int minLength = Integer.MAX_VALUE;
+            int maxLength = 0;
+            for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
+                numDocsWithField++;
+                BytesRef v = values.binaryValue();
+                int length = v.length;
+                data.writeBytes(v.bytes, v.offset, v.length);
+                minLength = Math.min(length, minLength);
+                maxLength = Math.max(length, maxLength);
+            }
+            assert numDocsWithField <= maxDoc;
+            meta.writeLong(data.getFilePointer() - start); // dataLength
+
+            if (numDocsWithField == 0) {
+                meta.writeLong(-2); // docsWithFieldOffset
+                meta.writeLong(0L); // docsWithFieldLength
+                meta.writeShort((short) -1); // jumpTableEntryCount
+                meta.writeByte((byte) -1); // denseRankPower
+            } else if (numDocsWithField == maxDoc) {
+                meta.writeLong(-1); // docsWithFieldOffset
+                meta.writeLong(0L); // docsWithFieldLength
+                meta.writeShort((short) -1); // jumpTableEntryCount
+                meta.writeByte((byte) -1); // denseRankPower
+            } else {
+                long offset = data.getFilePointer();
+                meta.writeLong(offset); // docsWithFieldOffset
+                values = valuesProducer.getBinary(field);
+                final short jumpTableEntryCount = IndexedDISI.writeBitSet(values, data, IndexedDISI.DEFAULT_DENSE_RANK_POWER);
+                meta.writeLong(data.getFilePointer() - offset); // docsWithFieldLength
+                meta.writeShort(jumpTableEntryCount);
+                meta.writeByte(IndexedDISI.DEFAULT_DENSE_RANK_POWER);
+            }
+
+            meta.writeInt(numDocsWithField);
+            meta.writeInt(minLength);
+            meta.writeInt(maxLength);
+            if (maxLength > minLength) {
+                start = data.getFilePointer();
+                meta.writeLong(start);
+                meta.writeVInt(ES819TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT);
+
+                final DirectMonotonicWriter writer = DirectMonotonicWriter.getInstance(
+                    meta,
+                    data,
+                    numDocsWithField + 1,
+                    ES819TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT
+                );
+                long addr = 0;
+                writer.add(addr);
+                values = valuesProducer.getBinary(field);
+                for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
+                    addr += values.binaryValue().length;
+                    writer.add(addr);
+                }
+                writer.finish();
+                meta.writeLong(data.getFilePointer() - start);
+            }
+        }
+    }
 
+    public void doAddCompressedBinaryLZ4(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
         if (valuesProducer instanceof TsdbDocValuesProducer tsdbValuesProducer && tsdbValuesProducer.mergeStats.supported()) {
             final int numDocsWithField = tsdbValuesProducer.mergeStats.sumNumDocsWithField();
             final int minLength = tsdbValuesProducer.mergeStats.minLength();
@@ -444,6 +590,164 @@ public void addBinaryField(FieldInfo field, DocValuesProducer valuesProducer) th
         }
     }
 
+    static final int BINARY_BLOCK_SHIFT = 5;
+    static final int BINARY_DOCS_PER_COMPRESSED_BLOCK = 1 << BINARY_BLOCK_SHIFT;
+
+    private class CompressedBinaryBlockWriter implements Closeable {
+        final LZ4.FastCompressionHashTable ht = new LZ4.FastCompressionHashTable();
+        int uncompressedBlockLength = 0;
+        int maxUncompressedBlockLength = 0;
+        int numDocsInCurrentBlock = 0;
+        final int[] docLengths = new int[BINARY_DOCS_PER_COMPRESSED_BLOCK];
+        byte[] block = BytesRef.EMPTY_BYTES;
+        int totalChunks = 0;
+        long maxPointer = 0;
+        final long blockAddressesStart;
+
+        final IndexOutput tempBinaryOffsets;
+
+        CompressedBinaryBlockWriter() throws IOException {
+            tempBinaryOffsets = EndiannessReverserUtil.createTempOutput(
+                state.directory,
+                state.segmentInfo.name,
+                "binary_pointers",
+                state.context
+            );
+            boolean success = false;
+            try {
+                CodecUtil.writeHeader(
+                    tempBinaryOffsets,
+                    ES819TSDBDocValuesFormat.META_CODEC + "FilePointers",
+                    ES819TSDBDocValuesFormat.VERSION_CURRENT
+                );
+                blockAddressesStart = data.getFilePointer();
+                success = true;
+            } finally {
+                if (success == false) {
+                    IOUtils.closeWhileHandlingException(this); // self-close because constructor caller can't
+                }
+            }
+        }
+
+        void addDoc(int doc, BytesRef v) throws IOException {
+            docLengths[numDocsInCurrentBlock] = v.length;
+            block = ArrayUtil.grow(block, uncompressedBlockLength + v.length);
+            System.arraycopy(v.bytes, v.offset, block, uncompressedBlockLength, v.length);
+            uncompressedBlockLength += v.length;
+            numDocsInCurrentBlock++;
+            if (numDocsInCurrentBlock == BINARY_DOCS_PER_COMPRESSED_BLOCK) {
+                flushData();
+            }
+        }
+
+        private void flushData() throws IOException {
+            if (numDocsInCurrentBlock > 0) {
+                // Write offset to this block to temporary offsets file
+                totalChunks++;
+                long thisBlockStartPointer = data.getFilePointer();
+
+                // Optimisation - check if all lengths are same
+                boolean allLengthsSame = true;
+                for (int i = 1; i < BINARY_DOCS_PER_COMPRESSED_BLOCK; i++) {
+                    if (docLengths[i] != docLengths[i - 1]) {
+                        allLengthsSame = false;
+                        break;
+                    }
+                }
+                if (allLengthsSame) {
+                    // Only write one value shifted. Steal a bit to indicate all other lengths are the same
+                    int onlyOneLength = (docLengths[0] << 1) | 1;
+                    data.writeVInt(onlyOneLength);
+                } else {
+                    for (int i = 0; i < BINARY_DOCS_PER_COMPRESSED_BLOCK; i++) {
+                        if (i == 0) {
+                            // Write first value shifted and steal a bit to indicate other lengths are to follow
+                            int multipleLengths = (docLengths[0] << 1);
+                            data.writeVInt(multipleLengths);
+                        } else {
+                            data.writeVInt(docLengths[i]);
+                        }
+                    }
+                }
+                maxUncompressedBlockLength = Math.max(maxUncompressedBlockLength, uncompressedBlockLength);
+                LZ4.compress(block, 0, uncompressedBlockLength, EndiannessReverserUtil.wrapDataOutput(data), ht);
+                numDocsInCurrentBlock = 0;
+                // Ensure initialized with zeroes because full array is always written
+                Arrays.fill(docLengths, 0);
+                uncompressedBlockLength = 0;
+                maxPointer = data.getFilePointer();
+                tempBinaryOffsets.writeVLong(maxPointer - thisBlockStartPointer);
+            }
+        }
+
+        void writeMetaData() throws IOException {
+            if (totalChunks == 0) {
+                return;
+            }
+
+            long startDMW = data.getFilePointer();
+            meta.writeLong(startDMW);
+
+            meta.writeVInt(totalChunks);
+            meta.writeVInt(BINARY_BLOCK_SHIFT);
+            meta.writeVInt(maxUncompressedBlockLength);
+            meta.writeVInt(DIRECT_MONOTONIC_BLOCK_SHIFT);
+
+            CodecUtil.writeFooter(tempBinaryOffsets);
+            IOUtils.close(tempBinaryOffsets);
+            // write the compressed block offsets info to the meta file by reading from temp file
+            try (
+                ChecksumIndexInput filePointersIn = EndiannessReverserUtil.openChecksumInput(
+                    state.directory,
+                    tempBinaryOffsets.getName(),
+                    IOContext.READONCE
+                )
+            ) {
+                CodecUtil.checkHeader(
+                    filePointersIn,
+                    ES819TSDBDocValuesFormat.META_CODEC + "FilePointers",
+                    ES819TSDBDocValuesFormat.VERSION_CURRENT,
+                    ES819TSDBDocValuesFormat.VERSION_CURRENT
+                );
+                Throwable priorE = null;
+                try {
+                    final DirectMonotonicWriter filePointers = DirectMonotonicWriter.getInstance(
+                        meta,
+                        data,
+                        totalChunks,
+                        ES819TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT
+                    );
+                    long fp = blockAddressesStart;
+                    for (int i = 0; i < totalChunks; ++i) {
+                        filePointers.add(fp);
+                        fp += filePointersIn.readVLong();
+                    }
+                    if (maxPointer < fp) {
+                        throw new CorruptIndexException(
+                            "File pointers don't add up (" + fp + " vs expected " + maxPointer + ")",
+                            filePointersIn
+                        );
+                    }
+                    filePointers.finish();
+                } catch (Throwable e) {
+                    priorE = e;
+                } finally {
+                    CodecUtil.checkFooter(filePointersIn, priorE);
+                }
+            }
+            // Write the length of the DMW block in the data
+            meta.writeLong(data.getFilePointer() - startDMW);
+        }
+
+        @Override
+        public void close() throws IOException {
+            if (tempBinaryOffsets != null) {
+                IOUtils.close(tempBinaryOffsets, () -> state.directory.deleteFile(tempBinaryOffsets.getName()));
+            }
+        }
+    }
+    // END: Copied fom LUCENE-9211
+
     @Override
     public void addSortedField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
         meta.writeInt(field.number);
diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java
index fbdef488b8318..63029f382caf3 100644
--- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java
@@ -14,6 +14,7 @@
 import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.index.SegmentWriteState;
 import org.elasticsearch.core.SuppressForbidden;
+import org.elasticsearch.index.codec.tsdb.BinaryDVCompressionMode;
 
 import java.io.IOException;
 
@@ -47,7 +48,8 @@ public class ES819TSDBDocValuesFormat extends org.apache.lucene.codecs.DocValues
     static final byte SORTED_NUMERIC = 4;
 
     static final int VERSION_START = 0;
-    static final int VERSION_CURRENT = VERSION_START;
+    static final int VERSION_BINARY_DV_COMPRESSION = 1;
+    static final int VERSION_CURRENT = VERSION_BINARY_DV_COMPRESSION;
 
     static final int TERMS_DICT_BLOCK_LZ4_SHIFT = 6;
     static final int TERMS_DICT_BLOCK_LZ4_SIZE = 1 << TERMS_DICT_BLOCK_LZ4_SHIFT;
@@ -119,15 +121,17 @@ private static boolean getOptimizedMergeEnabledDefault() {
     final int skipIndexIntervalSize;
     final int minDocsPerOrdinalForRangeEncoding;
     private final boolean enableOptimizedMerge;
+    final BinaryDVCompressionMode binaryDVCompressionMode;
 
     /** Default constructor. */
     public ES819TSDBDocValuesFormat() {
-        this(DEFAULT_SKIP_INDEX_INTERVAL_SIZE, ORDINAL_RANGE_ENCODING_MIN_DOC_PER_ORDINAL, OPTIMIZED_MERGE_ENABLE_DEFAULT);
+        this(DEFAULT_SKIP_INDEX_INTERVAL_SIZE, ORDINAL_RANGE_ENCODING_MIN_DOC_PER_ORDINAL, OPTIMIZED_MERGE_ENABLE_DEFAULT, BinaryDVCompressionMode.COMPRESSED_WITH_LZ4);
     }
 
     /** Doc values fields format with specified skipIndexIntervalSize. */
-    public ES819TSDBDocValuesFormat(int skipIndexIntervalSize, int minDocsPerOrdinalForRangeEncoding, boolean enableOptimizedMerge) {
+    public ES819TSDBDocValuesFormat(int skipIndexIntervalSize, int minDocsPerOrdinalForRangeEncoding, boolean enableOptimizedMerge, BinaryDVCompressionMode binaryDVCompressionMode) {
         super(CODEC_NAME);
+        this.binaryDVCompressionMode = binaryDVCompressionMode;
         if (skipIndexIntervalSize < 2) {
             throw new IllegalArgumentException("skipIndexIntervalSize must be > 1, got [" + skipIndexIntervalSize + "]");
         }
@@ -140,6 +144,7 @@ public ES819TSDBDocValuesFormat(int skipIndexIntervalSize, int minDocsPerOrdinal
     public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
         return new ES819TSDBDocValuesConsumer(
             state,
+            binaryDVCompressionMode,
             skipIndexIntervalSize,
             minDocsPerOrdinalForRangeEncoding,
             enableOptimizedMerge,

From 392f71727b7a098da3277647276ab7db0e3f36f4 Mon Sep 17 00:00:00 2001
From: Parker Timmins <parker.timmins@elastic.co>
Date: Mon, 6 Oct 2025 14:27:50 -0500
Subject: [PATCH 10/15] Copy lz4 consumer code for non-optimized case

---
 .../es819/ES819TSDBDocValuesConsumer.java     | 99 ++++++++-----------
 1 file changed, 42 insertions(+), 57 deletions(-)

diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java
index 51c6056ac70f4..1b71a558ae775 100644
--- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java
@@ -526,66 +526,51 @@ public void doAddCompressedBinaryLZ4(FieldInfo field, DocValuesProducer valuesPr
                 IOUtils.close(disiAccumulator, offsetsAccumulator);
             }
         } else {
-            BinaryDocValues values = valuesProducer.getBinary(field);
-            long start = data.getFilePointer();
-            meta.writeLong(start); // dataOffset
-            int numDocsWithField = 0;
-            int minLength = Integer.MAX_VALUE;
-            int maxLength = 0;
-            for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
-                numDocsWithField++;
-                BytesRef v = values.binaryValue();
-                int length = v.length;
-                data.writeBytes(v.bytes, v.offset, v.length);
-                minLength = Math.min(length, minLength);
-                maxLength = Math.max(length, maxLength);
-            }
-            assert numDocsWithField <= maxDoc;
-            meta.writeLong(data.getFilePointer() - start); // dataLength
-
-            if (numDocsWithField == 0) {
-                meta.writeLong(-2); // docsWithFieldOffset
-                meta.writeLong(0L); // docsWithFieldLength
-                meta.writeShort((short) -1); // jumpTableEntryCount
-                meta.writeByte((byte) -1); // denseRankPower
-            } else if (numDocsWithField == maxDoc) {
-                meta.writeLong(-1); // docsWithFieldOffset
-                meta.writeLong(0L); // docsWithFieldLength
-                meta.writeShort((short) -1); // jumpTableEntryCount
-                meta.writeByte((byte) -1); // denseRankPower
-            } else {
-                long offset = data.getFilePointer();
-                meta.writeLong(offset); // docsWithFieldOffset
-                values = valuesProducer.getBinary(field);
-                final short jumpTableEntryCount = IndexedDISI.writeBitSet(values, data, IndexedDISI.DEFAULT_DENSE_RANK_POWER);
-                meta.writeLong(data.getFilePointer() - offset); // docsWithFieldLength
-                meta.writeShort(jumpTableEntryCount);
-                meta.writeByte(IndexedDISI.DEFAULT_DENSE_RANK_POWER);
-            }
+            try (CompressedBinaryBlockWriter blockWriter = new CompressedBinaryBlockWriter()) {
+                BinaryDocValues values = valuesProducer.getBinary(field);
+                long start = data.getFilePointer();
+                meta.writeLong(start); // dataOffset
+                int numDocsWithField = 0;
+                int minLength = Integer.MAX_VALUE;
+                int maxLength = 0;
+                for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
+                    numDocsWithField++;
+                    BytesRef v = values.binaryValue();
+                    blockWriter.addDoc(doc, v);
+                    int length = v.length;
+                    minLength = Math.min(length, minLength);
+                    maxLength = Math.max(length, maxLength);
+                }
+                blockWriter.flushData();
 
-            meta.writeInt(numDocsWithField);
-            meta.writeInt(minLength);
-            meta.writeInt(maxLength);
-            if (maxLength > minLength) {
-                start = data.getFilePointer();
-                meta.writeLong(start);
-                meta.writeVInt(ES819TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT);
+                assert numDocsWithField <= maxDoc;
+                meta.writeLong(data.getFilePointer() - start); // dataLength
 
-                final DirectMonotonicWriter writer = DirectMonotonicWriter.getInstance(
-                    meta,
-                    data,
-                    numDocsWithField + 1,
-                    ES819TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT
-                );
-                long addr = 0;
-                writer.add(addr);
-                values = valuesProducer.getBinary(field);
-                for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
-                    addr += values.binaryValue().length;
-                    writer.add(addr);
+                if (numDocsWithField == 0) {
+                    meta.writeLong(-2); // docsWithFieldOffset
+                    meta.writeLong(0L); // docsWithFieldLength
+                    meta.writeShort((short) -1); // jumpTableEntryCount
+                    meta.writeByte((byte) -1); // denseRankPower
+                } else if (numDocsWithField == maxDoc) {
+                    meta.writeLong(-1); // docsWithFieldOffset
+                    meta.writeLong(0L); // docsWithFieldLength
+                    meta.writeShort((short) -1); // jumpTableEntryCount
+                    meta.writeByte((byte) -1); // denseRankPower
+                } else {
+                    long offset = data.getFilePointer();
+                    meta.writeLong(offset); // docsWithFieldOffset
+                    values = valuesProducer.getBinary(field);
+                    final short jumpTableEntryCount = IndexedDISI.writeBitSet(values, data, IndexedDISI.DEFAULT_DENSE_RANK_POWER);
+                    meta.writeLong(data.getFilePointer() - offset); // docsWithFieldLength
+                    meta.writeShort(jumpTableEntryCount);
+                    meta.writeByte(IndexedDISI.DEFAULT_DENSE_RANK_POWER);
                 }
-                writer.finish();
-                meta.writeLong(data.getFilePointer() - start);
+
+                meta.writeInt(numDocsWithField);
+                meta.writeInt(minLength);
+                meta.writeInt(maxLength);
+
+                blockWriter.writeMetaData();
             }
         }
     }

From d3760fdf5ef310d88931c9b3a7ab5d29b19e5de3 Mon Sep 17 00:00:00 2001
From: Parker Timmins <parker.timmins@elastic.co>
Date: Mon, 6 Oct 2025 14:45:35 -0500
Subject: [PATCH 11/15] Copy over remainder of compression code, dont use
 optimized binary doc values

---
 .../codec/tsdb/BinaryDVCompressionMode.java   |   2 +-
 .../es819/ES819TSDBDocValuesConsumer.java     | 139 ++++-----------
 .../es819/ES819TSDBDocValuesProducer.java     | 162 +++++++++++++++++-
 .../codec/tsdb/DocValuesCodecDuelTests.java   |   8 +-
 .../es819/ES819TSDBDocValuesFormatTests.java  |   5 +-
 5 files changed, 206 insertions(+), 110 deletions(-)

diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/BinaryDVCompressionMode.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/BinaryDVCompressionMode.java
index ddbd6d493c3b1..ce0f365eb529e 100644
--- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/BinaryDVCompressionMode.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/BinaryDVCompressionMode.java
@@ -19,7 +19,7 @@ public enum BinaryDVCompressionMode {
         this.code = code;
     }
 
-    static BinaryDVCompressionMode fromMode(byte mode) {
+    public static BinaryDVCompressionMode fromMode(byte mode) {
         return switch (mode) {
             case 0 -> NO_COMPRESS;
             case 1 -> COMPRESSED_WITH_LZ4;
diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java
index 1b71a558ae775..fce05c94a4d3e 100644
--- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java
@@ -462,116 +462,51 @@ public void doAddUncompressedBinary(FieldInfo field, DocValuesProducer valuesPro
     }
 
     public void doAddCompressedBinaryLZ4(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
-        if (valuesProducer instanceof TsdbDocValuesProducer tsdbValuesProducer && tsdbValuesProducer.mergeStats.supported()) {
-            final int numDocsWithField = tsdbValuesProducer.mergeStats.sumNumDocsWithField();
-            final int minLength = tsdbValuesProducer.mergeStats.minLength();
-            final int maxLength = tsdbValuesProducer.mergeStats.maxLength();
-
-            assert numDocsWithField <= maxDoc;
-
+        try (CompressedBinaryBlockWriter blockWriter = new CompressedBinaryBlockWriter()) {
             BinaryDocValues values = valuesProducer.getBinary(field);
             long start = data.getFilePointer();
             meta.writeLong(start); // dataOffset
-
-            OffsetsAccumulator offsetsAccumulator = null;
-            DISIAccumulator disiAccumulator = null;
-            try {
-                if (numDocsWithField > 0 && numDocsWithField < maxDoc) {
-                    disiAccumulator = new DISIAccumulator(dir, context, data, IndexedDISI.DEFAULT_DENSE_RANK_POWER);
-                }
-
-                assert maxLength >= minLength;
-                if (maxLength > minLength) {
-                    offsetsAccumulator = new OffsetsAccumulator(dir, context, data, numDocsWithField);
-                }
-
-                for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
-                    BytesRef v = values.binaryValue();
-                    data.writeBytes(v.bytes, v.offset, v.length);
-                    if (disiAccumulator != null) {
-                        disiAccumulator.addDocId(doc);
-                    }
-                    if (offsetsAccumulator != null) {
-                        offsetsAccumulator.addDoc(v.length);
-                    }
-                }
-                meta.writeLong(data.getFilePointer() - start); // dataLength
-
-                if (numDocsWithField == 0) {
-                    meta.writeLong(-2); // docsWithFieldOffset
-                    meta.writeLong(0L); // docsWithFieldLength
-                    meta.writeShort((short) -1); // jumpTableEntryCount
-                    meta.writeByte((byte) -1); // denseRankPower
-                } else if (numDocsWithField == maxDoc) {
-                    meta.writeLong(-1); // docsWithFieldOffset
-                    meta.writeLong(0L); // docsWithFieldLength
-                    meta.writeShort((short) -1); // jumpTableEntryCount
-                    meta.writeByte((byte) -1); // denseRankPower
-                } else {
-                    long offset = data.getFilePointer();
-                    meta.writeLong(offset); // docsWithFieldOffset
-                    final short jumpTableEntryCount = disiAccumulator.build(data);
-                    meta.writeLong(data.getFilePointer() - offset); // docsWithFieldLength
-                    meta.writeShort(jumpTableEntryCount);
-                    meta.writeByte(IndexedDISI.DEFAULT_DENSE_RANK_POWER);
-                }
-
-                meta.writeInt(numDocsWithField);
-                meta.writeInt(minLength);
-                meta.writeInt(maxLength);
-                if (offsetsAccumulator != null) {
-                    offsetsAccumulator.build(meta, data);
-                }
-            } finally {
-                IOUtils.close(disiAccumulator, offsetsAccumulator);
+            int numDocsWithField = 0;
+            int minLength = Integer.MAX_VALUE;
+            int maxLength = 0;
+            for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
+                numDocsWithField++;
+                BytesRef v = values.binaryValue();
+                blockWriter.addDoc(doc, v);
+                int length = v.length;
+                minLength = Math.min(length, minLength);
+                maxLength = Math.max(length, maxLength);
             }
-        } else {
-            try (CompressedBinaryBlockWriter blockWriter = new CompressedBinaryBlockWriter()) {
-                BinaryDocValues values = valuesProducer.getBinary(field);
-                long start = data.getFilePointer();
-                meta.writeLong(start); // dataOffset
-                int numDocsWithField = 0;
-                int minLength = Integer.MAX_VALUE;
-                int maxLength = 0;
-                for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
-                    numDocsWithField++;
-                    BytesRef v = values.binaryValue();
-                    blockWriter.addDoc(doc, v);
-                    int length = v.length;
-                    minLength = Math.min(length, minLength);
-                    maxLength = Math.max(length, maxLength);
-                }
-                blockWriter.flushData();
+            blockWriter.flushData();
 
-                assert numDocsWithField <= maxDoc;
-                meta.writeLong(data.getFilePointer() - start); // dataLength
+            assert numDocsWithField <= maxDoc;
+            meta.writeLong(data.getFilePointer() - start); // dataLength
 
-                if (numDocsWithField == 0) {
-                    meta.writeLong(-2); // docsWithFieldOffset
-                    meta.writeLong(0L); // docsWithFieldLength
-                    meta.writeShort((short) -1); // jumpTableEntryCount
-                    meta.writeByte((byte) -1); // denseRankPower
-                } else if (numDocsWithField == maxDoc) {
-                    meta.writeLong(-1); // docsWithFieldOffset
-                    meta.writeLong(0L); // docsWithFieldLength
-                    meta.writeShort((short) -1); // jumpTableEntryCount
-                    meta.writeByte((byte) -1); // denseRankPower
-                } else {
-                    long offset = data.getFilePointer();
-                    meta.writeLong(offset); // docsWithFieldOffset
-                    values = valuesProducer.getBinary(field);
-                    final short jumpTableEntryCount = IndexedDISI.writeBitSet(values, data, IndexedDISI.DEFAULT_DENSE_RANK_POWER);
-                    meta.writeLong(data.getFilePointer() - offset); // docsWithFieldLength
-                    meta.writeShort(jumpTableEntryCount);
-                    meta.writeByte(IndexedDISI.DEFAULT_DENSE_RANK_POWER);
-                }
+            if (numDocsWithField == 0) {
+                meta.writeLong(-2); // docsWithFieldOffset
+                meta.writeLong(0L); // docsWithFieldLength
+                meta.writeShort((short) -1); // jumpTableEntryCount
+                meta.writeByte((byte) -1); // denseRankPower
+            } else if (numDocsWithField == maxDoc) {
+                meta.writeLong(-1); // docsWithFieldOffset
+                meta.writeLong(0L); // docsWithFieldLength
+                meta.writeShort((short) -1); // jumpTableEntryCount
+                meta.writeByte((byte) -1); // denseRankPower
+            } else {
+                long offset = data.getFilePointer();
+                meta.writeLong(offset); // docsWithFieldOffset
+                values = valuesProducer.getBinary(field);
+                final short jumpTableEntryCount = IndexedDISI.writeBitSet(values, data, IndexedDISI.DEFAULT_DENSE_RANK_POWER);
+                meta.writeLong(data.getFilePointer() - offset); // docsWithFieldLength
+                meta.writeShort(jumpTableEntryCount);
+                meta.writeByte(IndexedDISI.DEFAULT_DENSE_RANK_POWER);
+            }
 
-                meta.writeInt(numDocsWithField);
-                meta.writeInt(minLength);
-                meta.writeInt(maxLength);
+            meta.writeInt(numDocsWithField);
+            meta.writeInt(minLength);
+            meta.writeInt(maxLength);
 
-                blockWriter.writeMetaData();
-            }
+            blockWriter.writeMetaData();
         }
     }
 
diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java
index e26f8d04965ae..f20dc498d346f 100644
--- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java
@@ -45,12 +45,15 @@
 import org.apache.lucene.util.packed.PackedInts;
 import org.elasticsearch.core.Assertions;
 import org.elasticsearch.core.IOUtils;
+import org.elasticsearch.index.codec.tsdb.BinaryDVCompressionMode;
 import org.elasticsearch.index.codec.tsdb.TSDBDocValuesEncoder;
 import org.elasticsearch.index.mapper.BlockDocValuesReader;
 import org.elasticsearch.index.mapper.BlockLoader;
 
 import java.io.IOException;
 
+import static org.elasticsearch.index.codec.tsdb.BinaryDVCompressionMode.COMPRESSED_WITH_LZ4;
+import static org.elasticsearch.index.codec.tsdb.BinaryDVCompressionMode.NO_COMPRESS;
 import static org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat.SKIP_INDEX_JUMP_LENGTH_PER_LEVEL;
 import static org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat.SKIP_INDEX_MAX_LEVEL;
 import static org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat.TERMS_DICT_BLOCK_LZ4_SHIFT;
@@ -97,7 +100,7 @@ final class ES819TSDBDocValuesProducer extends DocValuesProducer {
                     state.segmentSuffix
                 );
 
-                readFields(in, state.fieldInfos);
+                readFields(in, state.fieldInfos, version);
 
             } catch (Throwable exception) {
                 priorE = exception;
@@ -193,6 +196,13 @@ public BinaryDocValues getBinary(FieldInfo field) throws IOException {
             return DocValues.emptyBinary();
         }
 
+        return switch (entry.compression) {
+            case NO_COMPRESS -> getUncompressedBinary(entry);
+            case COMPRESSED_WITH_LZ4 -> getCompressedBinary(entry);
+        };
+    }
+
+    public BinaryDocValues getUncompressedBinary(BinaryEntry entry) throws IOException {
         final RandomAccessInput bytesSlice = data.randomAccessSlice(entry.dataOffset, entry.dataLength);
 
         if (entry.docsWithFieldOffset == -1) {
@@ -309,6 +319,132 @@ public BytesRef binaryValue() throws IOException {
         }
     }
 
+    // START: Copied fom LUCENE-9211
+    private BinaryDocValues getCompressedBinary(BinaryEntry entry) throws IOException {
+        if (entry.docsWithFieldOffset == -1) {
+            // dense
+            final RandomAccessInput addressesData = this.data.randomAccessSlice(entry.addressesOffset, entry.addressesLength);
+            final LongValues addresses = DirectMonotonicReader.getInstance(entry.addressesMeta, addressesData);
+            return new DenseBinaryDocValues(maxDoc) {
+                final BinaryDecoder decoder = new BinaryDecoder(
+                    addresses,
+                    data.clone(),
+                    entry.maxUncompressedChunkSize,
+                    entry.docsPerChunkShift
+                );
+
+                @Override
+                public BytesRef binaryValue() throws IOException {
+                    return decoder.decode(doc);
+                }
+            };
+        } else {
+            // sparse
+            final IndexedDISI disi = new IndexedDISI(
+                data,
+                entry.docsWithFieldOffset,
+                entry.docsWithFieldLength,
+                entry.jumpTableEntryCount,
+                entry.denseRankPower,
+                entry.numDocsWithField
+            );
+            final RandomAccessInput addressesData = this.data.randomAccessSlice(entry.addressesOffset, entry.addressesLength);
+            final LongValues addresses = DirectMonotonicReader.getInstance(entry.addressesMeta, addressesData);
+            return new SparseBinaryDocValues(disi) {
+                final BinaryDecoder decoder = new BinaryDecoder(
+                    addresses,
+                    data.clone(),
+                    entry.maxUncompressedChunkSize,
+                    entry.docsPerChunkShift
+                );
+
+                @Override
+                public BytesRef binaryValue() throws IOException {
+                    return decoder.decode(disi.index());
+                }
+            };
+        }
+
+    }
+
+    // Decompresses blocks of binary values to retrieve content
+    static final class BinaryDecoder {
+
+        private final LongValues addresses;
+        private final IndexInput compressedData;
+        // Cache of last uncompressed block
+        private long lastBlockId = -1;
+        private final int[] uncompressedDocStarts;
+        private final byte[] uncompressedBlock;
+        private final BytesRef uncompressedBytesRef;
+        private final int docsPerChunk;
+        private final int docsPerChunkShift;
+
+        BinaryDecoder(LongValues addresses, IndexInput compressedData, int biggestUncompressedBlockSize, int docsPerChunkShift) {
+            super();
+            this.addresses = addresses;
+            this.compressedData = compressedData;
+            // pre-allocate a byte array large enough for the biggest uncompressed block needed.
+            this.uncompressedBlock = new byte[biggestUncompressedBlockSize];
+            uncompressedBytesRef = new BytesRef(uncompressedBlock);
+            this.docsPerChunk = 1 << docsPerChunkShift;
+            this.docsPerChunkShift = docsPerChunkShift;
+            uncompressedDocStarts = new int[docsPerChunk + 1];
+        }
+
+        BytesRef decode(int docNumber) throws IOException {
+            int blockId = docNumber >> docsPerChunkShift;
+            int docInBlockId = docNumber % docsPerChunk;
+            assert docInBlockId < docsPerChunk;
+
+            // already read and uncompressed?
+            if (blockId != lastBlockId) {
+                lastBlockId = blockId;
+                long blockStartOffset = addresses.get(blockId);
+                compressedData.seek(blockStartOffset);
+
+                int uncompressedBlockLength = 0;
+
+                int onlyLength = -1;
+                for (int i = 0; i < docsPerChunk; i++) {
+                    if (i == 0) {
+                        // The first length value is special. It is shifted and has a bit to denote if
+                        // all other values are the same length
+                        int lengthPlusSameInd = compressedData.readVInt();
+                        int sameIndicator = lengthPlusSameInd & 1;
+                        int firstValLength = lengthPlusSameInd >>> 1;
+                        if (sameIndicator == 1) {
+                            onlyLength = firstValLength;
+                        }
+                        uncompressedBlockLength += firstValLength;
+                    } else {
+                        if (onlyLength == -1) {
+                            // Various lengths are stored - read each from disk
+                            uncompressedBlockLength += compressedData.readVInt();
+                        } else {
+                            // Only one length
+                            uncompressedBlockLength += onlyLength;
+                        }
+                    }
+                    uncompressedDocStarts[i + 1] = uncompressedBlockLength;
+                }
+
+                if (uncompressedBlockLength == 0) {
+                    uncompressedBytesRef.offset = 0;
+                    uncompressedBytesRef.length = 0;
+                    return uncompressedBytesRef;
+                }
+
+                assert uncompressedBlockLength <= uncompressedBlock.length;
+                LZ4.decompress(EndiannessReverserUtil.wrapDataInput(compressedData), uncompressedBlockLength, uncompressedBlock, 0);
+            }
+
+            uncompressedBytesRef.offset = uncompressedDocStarts[docInBlockId];
+            uncompressedBytesRef.length = uncompressedDocStarts[docInBlockId + 1] - uncompressedBytesRef.offset;
+            return uncompressedBytesRef;
+        }
+    }
+    // END: Copied fom LUCENE-9211
     private abstract static class DenseBinaryDocValues extends BinaryDocValues implements BlockLoader.OptionalColumnAtATimeReader {
 
         final int maxDoc;
@@ -1129,7 +1265,7 @@ static int primarySortFieldNumber(SegmentInfo segmentInfo, FieldInfos fieldInfos
         return -1;
     }
 
-    private void readFields(IndexInput meta, FieldInfos infos) throws IOException {
+    private void readFields(IndexInput meta, FieldInfos infos, int version) throws IOException {
         for (int fieldNumber = meta.readInt(); fieldNumber != -1; fieldNumber = meta.readInt()) {
             FieldInfo info = infos.fieldInfo(fieldNumber);
             if (info == null) {
@@ -1142,7 +1278,7 @@ private void readFields(IndexInput meta, FieldInfos infos) throws IOException {
             if (type == ES819TSDBDocValuesFormat.NUMERIC) {
                 numerics.put(info.number, readNumeric(meta));
             } else if (type == ES819TSDBDocValuesFormat.BINARY) {
-                binaries.put(info.number, readBinary(meta));
+                binaries.put(info.number, readBinary(meta, version));
             } else if (type == ES819TSDBDocValuesFormat.SORTED) {
                 sorted.put(info.number, readSorted(meta));
             } else if (type == ES819TSDBDocValuesFormat.SORTED_SET) {
@@ -1204,8 +1340,15 @@ private static void readNumeric(IndexInput meta, NumericEntry entry) throws IOEx
         entry.denseRankPower = meta.readByte();
     }
 
-    private BinaryEntry readBinary(IndexInput meta) throws IOException {
-        final BinaryEntry entry = new BinaryEntry();
+    private BinaryEntry readBinary(IndexInput meta, int version) throws IOException {
+        final BinaryDVCompressionMode compression;
+        if (version >= ES819TSDBDocValuesFormat.VERSION_BINARY_DV_COMPRESSION) {
+            compression = BinaryDVCompressionMode.fromMode(meta.readByte());
+        } else {
+            compression = BinaryDVCompressionMode.NO_COMPRESS;
+        }
+        final BinaryEntry entry = new BinaryEntry(compression);
+
         entry.dataOffset = meta.readLong();
         entry.dataLength = meta.readLong();
         entry.docsWithFieldOffset = meta.readLong();
@@ -1888,6 +2031,8 @@ static class NumericEntry {
     }
 
     static class BinaryEntry {
+        final BinaryDVCompressionMode compression;
+
         long dataOffset;
         long dataLength;
         long docsWithFieldOffset;
@@ -1899,7 +2044,14 @@ static class BinaryEntry {
         int maxLength;
         long addressesOffset;
         long addressesLength;
+        // compression mode
+        int maxUncompressedChunkSize;
+        int docsPerChunkShift;
         DirectMonotonicReader.Meta addressesMeta;
+
+        BinaryEntry(BinaryDVCompressionMode compression) {
+            this.compression = compression;
+        }
     }
 
     static class SortedNumericEntry extends NumericEntry {
diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/DocValuesCodecDuelTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/DocValuesCodecDuelTests.java
index ee9351ed51b97..0e10ae7cdbf22 100644
--- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/DocValuesCodecDuelTests.java
+++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/DocValuesCodecDuelTests.java
@@ -44,6 +44,11 @@ public class DocValuesCodecDuelTests extends ESTestCase {
     private static final String FIELD_4 = "number_field_4";
     private static final String FIELD_5 = "binary_field_5";
 
+    public static BinaryDVCompressionMode randomCompressionMode() {
+        BinaryDVCompressionMode[] modes = BinaryDVCompressionMode.values();
+        return modes[random().nextInt(modes.length)];
+    }
+
     @SuppressWarnings("checkstyle:LineLength")
     public void testDuel() throws IOException {
         try (var baselineDirectory = newDirectory(); var contenderDirectory = newDirectory()) {
@@ -61,7 +66,8 @@ public void testDuel() throws IOException {
                     ? new ES819TSDBDocValuesFormat(
                         ESTestCase.randomIntBetween(1, 4096),
                         ESTestCase.randomIntBetween(1, 512),
-                        random().nextBoolean()
+                        random().nextBoolean(),
+                        randomCompressionMode()
                     )
                     : new TestES87TSDBDocValuesFormat();
 
diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java
index 003124ab4b6f4..ee638c7697a84 100644
--- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java
+++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java
@@ -42,6 +42,7 @@
 import org.elasticsearch.common.util.CollectionUtils;
 import org.elasticsearch.index.codec.Elasticsearch900Lucene101Codec;
 import org.elasticsearch.index.codec.Elasticsearch92Lucene103Codec;
+import org.elasticsearch.index.codec.tsdb.BinaryDVCompressionMode;
 import org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormatTests;
 import org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesProducer.BaseDenseNumericValues;
 import org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesProducer.BaseSortedDocValues;
@@ -70,10 +71,12 @@ public class ES819TSDBDocValuesFormatTests extends ES87TSDBDocValuesFormatTests
 
     private final Codec codec = new Elasticsearch92Lucene103Codec() {
 
+        BinaryDVCompressionMode[] modes = BinaryDVCompressionMode.values();
         final ES819TSDBDocValuesFormat docValuesFormat = new ES819TSDBDocValuesFormat(
             ESTestCase.randomIntBetween(2, 4096),
             ESTestCase.randomIntBetween(1, 512),
-            random().nextBoolean()
+            random().nextBoolean(),
+            modes[random().nextInt(modes.length)]
         );
 
         @Override

From 57c2f248a6200f754356cff38ffbfa5e190e2713 Mon Sep 17 00:00:00 2001
From: Parker Timmins <parker.timmins@elastic.co>
Date: Mon, 6 Oct 2025 15:01:24 -0500
Subject: [PATCH 12/15] Fix issue with readBinary

---
 .../es819/ES819TSDBDocValuesProducer.java     | 36 ++++++++++++++-----
 .../codec/tsdb/DocValuesCodecDuelTests.java   |  8 ++---
 .../codec/tsdb/TsdbDocValueBwcTests.java      |  4 ++-
 .../es819/ES819TSDBDocValuesFormatTests.java  |  9 +++--
 ...ValuesFormatVariableSkipIntervalTests.java |  4 +--
 5 files changed, 41 insertions(+), 20 deletions(-)

diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java
index f20dc498d346f..5ee52f7a4ab8f 100644
--- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java
@@ -9,6 +9,7 @@
 
 package org.elasticsearch.index.codec.tsdb.es819;
 
+import org.apache.lucene.backward_codecs.store.EndiannessReverserUtil;
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.codecs.DocValuesProducer;
 import org.apache.lucene.codecs.lucene90.IndexedDISI;
@@ -337,6 +338,11 @@ private BinaryDocValues getCompressedBinary(BinaryEntry entry) throws IOExceptio
                 public BytesRef binaryValue() throws IOException {
                     return decoder.decode(doc);
                 }
+
+                @Override
+                public BlockLoader.Block tryRead(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset, boolean nullsFiltered, BlockDocValuesReader.ToDouble toDouble, boolean toInt) throws IOException {
+                    return null;
+                }
             };
         } else {
             // sparse
@@ -1358,15 +1364,27 @@ private BinaryEntry readBinary(IndexInput meta, int version) throws IOException
         entry.numDocsWithField = meta.readInt();
         entry.minLength = meta.readInt();
         entry.maxLength = meta.readInt();
-        if (entry.minLength < entry.maxLength) {
-            entry.addressesOffset = meta.readLong();
-
-            // Old count of uncompressed addresses
-            long numAddresses = entry.numDocsWithField + 1L;
-
-            final int blockShift = meta.readVInt();
-            entry.addressesMeta = DirectMonotonicReader.loadMeta(meta, numAddresses, blockShift);
-            entry.addressesLength = meta.readLong();
+        if (compression == BinaryDVCompressionMode.NO_COMPRESS) {
+            if (entry.minLength < entry.maxLength) {
+                entry.addressesOffset = meta.readLong();
+                // Old count of uncompressed addresses
+                long numAddresses = entry.numDocsWithField + 1L;
+                final int blockShift = meta.readVInt();
+                entry.addressesMeta = DirectMonotonicReader.loadMeta(meta, numAddresses, blockShift);
+                entry.addressesLength = meta.readLong();
+            }
+        } else {
+            if (entry.numDocsWithField > 0 || entry.minLength < entry.maxLength) {
+                entry.addressesOffset = meta.readLong();
+                // New count of compressed addresses - the number of compresseed blocks
+                int numCompressedChunks = meta.readVInt();
+                entry.docsPerChunkShift = meta.readVInt();
+                entry.maxUncompressedChunkSize = meta.readVInt();
+
+                final int blockShift = meta.readVInt();
+                entry.addressesMeta = DirectMonotonicReader.loadMeta(meta, numCompressedChunks, blockShift);
+                entry.addressesLength = meta.readLong();
+            }
         }
         return entry;
     }
diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/DocValuesCodecDuelTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/DocValuesCodecDuelTests.java
index 0e10ae7cdbf22..1efe9c3f0bf36 100644
--- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/DocValuesCodecDuelTests.java
+++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/DocValuesCodecDuelTests.java
@@ -29,6 +29,7 @@
 import org.elasticsearch.index.codec.Elasticsearch92Lucene103Codec;
 import org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormatTests.TestES87TSDBDocValuesFormat;
 import org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat;
+import org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormatTests;
 import org.elasticsearch.test.ESTestCase;
 
 import java.io.IOException;
@@ -44,11 +45,6 @@ public class DocValuesCodecDuelTests extends ESTestCase {
     private static final String FIELD_4 = "number_field_4";
     private static final String FIELD_5 = "binary_field_5";
 
-    public static BinaryDVCompressionMode randomCompressionMode() {
-        BinaryDVCompressionMode[] modes = BinaryDVCompressionMode.values();
-        return modes[random().nextInt(modes.length)];
-    }
-
     @SuppressWarnings("checkstyle:LineLength")
     public void testDuel() throws IOException {
         try (var baselineDirectory = newDirectory(); var contenderDirectory = newDirectory()) {
@@ -67,7 +63,7 @@ public void testDuel() throws IOException {
                         ESTestCase.randomIntBetween(1, 4096),
                         ESTestCase.randomIntBetween(1, 512),
                         random().nextBoolean(),
-                        randomCompressionMode()
+                        ES819TSDBDocValuesFormatTests.randomCompressionMode()
                     )
                     : new TestES87TSDBDocValuesFormat();
 
diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/TsdbDocValueBwcTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/TsdbDocValueBwcTests.java
index d2c8aae601977..23f85da450431 100644
--- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/TsdbDocValueBwcTests.java
+++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/TsdbDocValueBwcTests.java
@@ -42,6 +42,7 @@
 import org.elasticsearch.index.codec.perfield.XPerFieldDocValuesFormat;
 import org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormatTests.TestES87TSDBDocValuesFormat;
 import org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat;
+import org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormatTests;
 import org.elasticsearch.test.ESTestCase;
 import org.hamcrest.Matchers;
 
@@ -291,7 +292,8 @@ public void testEncodeOrdinalRange() throws IOException {
                         new ES819TSDBDocValuesFormat(
                             random().nextInt(16, 128),
                             nextOrdinalRangeThreshold.getAsInt(),
-                            random().nextBoolean()
+                            random().nextBoolean(),
+                            ES819TSDBDocValuesFormatTests.randomCompressionMode()
                         )
                     )
                 );
diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java
index ee638c7697a84..eb4755cd9b0f3 100644
--- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java
+++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java
@@ -43,6 +43,7 @@
 import org.elasticsearch.index.codec.Elasticsearch900Lucene101Codec;
 import org.elasticsearch.index.codec.Elasticsearch92Lucene103Codec;
 import org.elasticsearch.index.codec.tsdb.BinaryDVCompressionMode;
+import org.elasticsearch.index.codec.tsdb.DocValuesCodecDuelTests;
 import org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormatTests;
 import org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesProducer.BaseDenseNumericValues;
 import org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesProducer.BaseSortedDocValues;
@@ -69,14 +70,18 @@
 
 public class ES819TSDBDocValuesFormatTests extends ES87TSDBDocValuesFormatTests {
 
+    public static BinaryDVCompressionMode randomCompressionMode() {
+        BinaryDVCompressionMode[] modes = BinaryDVCompressionMode.values();
+        return modes[random().nextInt(modes.length)];
+    }
+
     private final Codec codec = new Elasticsearch92Lucene103Codec() {
 
-        BinaryDVCompressionMode[] modes = BinaryDVCompressionMode.values();
         final ES819TSDBDocValuesFormat docValuesFormat = new ES819TSDBDocValuesFormat(
             ESTestCase.randomIntBetween(2, 4096),
             ESTestCase.randomIntBetween(1, 512),
             random().nextBoolean(),
-            modes[random().nextInt(modes.length)]
+            randomCompressionMode()
         );
 
         @Override
diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatVariableSkipIntervalTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatVariableSkipIntervalTests.java
index 247b75f2977b5..0ecb042b3e215 100644
--- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatVariableSkipIntervalTests.java
+++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatVariableSkipIntervalTests.java
@@ -19,14 +19,14 @@ public class ES819TSDBDocValuesFormatVariableSkipIntervalTests extends ES87TSDBD
     protected Codec getCodec() {
         // small interval size to test with many intervals
         return TestUtil.alwaysDocValuesFormat(
-            new ES819TSDBDocValuesFormat(random().nextInt(4, 16), random().nextInt(1, 32), random().nextBoolean())
+            new ES819TSDBDocValuesFormat(random().nextInt(4, 16), random().nextInt(1, 32), random().nextBoolean(), ES819TSDBDocValuesFormatTests.randomCompressionMode())
         );
     }
 
     public void testSkipIndexIntervalSize() {
         IllegalArgumentException ex = expectThrows(
             IllegalArgumentException.class,
-            () -> new ES819TSDBDocValuesFormat(random().nextInt(Integer.MIN_VALUE, 2), random().nextInt(1, 32), random().nextBoolean())
+            () -> new ES819TSDBDocValuesFormat(random().nextInt(Integer.MIN_VALUE, 2), random().nextInt(1, 32), random().nextBoolean(), ES819TSDBDocValuesFormatTests.randomCompressionMode())
         );
         assertTrue(ex.getMessage().contains("skipIndexIntervalSize must be > 1"));
     }

From 48e15a881118752478e05fa47206baea9bfee588 Mon Sep 17 00:00:00 2001
From: Parker Timmins <parker.timmins@elastic.co>
Date: Mon, 6 Oct 2025 15:06:18 -0500
Subject: [PATCH 13/15] Restrict compression to keyword binary doc values

---
 .../index/codec/PerFieldFormatSupplier.java          | 12 +++++++++++-
 .../codec/tsdb/es819/ES819TSDBDocValuesFormat.java   |  4 ++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java b/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java
index 2ed1aa6c9f17f..6d4cc5cc1276a 100644
--- a/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java
@@ -20,9 +20,11 @@
 import org.elasticsearch.index.IndexVersions;
 import org.elasticsearch.index.codec.bloomfilter.ES87BloomFilterPostingsFormat;
 import org.elasticsearch.index.codec.postings.ES812PostingsFormat;
+import org.elasticsearch.index.codec.tsdb.BinaryDVCompressionMode;
 import org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat;
 import org.elasticsearch.index.mapper.CompletionFieldMapper;
 import org.elasticsearch.index.mapper.IdFieldMapper;
+import org.elasticsearch.index.mapper.KeywordFieldMapper;
 import org.elasticsearch.index.mapper.Mapper;
 import org.elasticsearch.index.mapper.MapperService;
 import org.elasticsearch.index.mapper.SeqNoFieldMapper;
@@ -57,7 +59,8 @@ public class PerFieldFormatSupplier {
 
     private static final DocValuesFormat docValuesFormat = new Lucene90DocValuesFormat();
     private static final KnnVectorsFormat knnVectorsFormat = new Lucene99HnswVectorsFormat();
-    private static final ES819TSDBDocValuesFormat tsdbDocValuesFormat = new ES819TSDBDocValuesFormat();
+    private static final ES819TSDBDocValuesFormat tsdbDocValuesFormat = new ES819TSDBDocValuesFormat(BinaryDVCompressionMode.NO_COMPRESS);
+    private static final DocValuesFormat compressedBinaryDocValuesFormat = new ES819TSDBDocValuesFormat(BinaryDVCompressionMode.COMPRESSED_WITH_LZ4);
     private static final ES812PostingsFormat es812PostingsFormat = new ES812PostingsFormat();
     private static final PostingsFormat completionPostingsFormat = PostingsFormat.forName("Completion101");
 
@@ -127,6 +130,13 @@ public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
     }
 
     public DocValuesFormat getDocValuesFormatForField(String field) {
+        if (mapperService != null) {
+            Mapper mapper = mapperService.mappingLookup().getMapper(field);
+            if (mapper != null && KeywordFieldMapper.CONTENT_TYPE.equals(mapper.typeName())) {
+                return compressedBinaryDocValuesFormat;
+            }
+        }
+
         if (useTSDBDocValuesFormat(field)) {
             return tsdbDocValuesFormat;
         }
diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java
index 63029f382caf3..fd776c8c73e74 100644
--- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java
@@ -128,6 +128,10 @@ public ES819TSDBDocValuesFormat() {
         this(DEFAULT_SKIP_INDEX_INTERVAL_SIZE, ORDINAL_RANGE_ENCODING_MIN_DOC_PER_ORDINAL, OPTIMIZED_MERGE_ENABLE_DEFAULT, BinaryDVCompressionMode.COMPRESSED_WITH_LZ4);
     }
 
+    public ES819TSDBDocValuesFormat(BinaryDVCompressionMode binaryDVCompressionMode) {
+        this(DEFAULT_SKIP_INDEX_INTERVAL_SIZE, ORDINAL_RANGE_ENCODING_MIN_DOC_PER_ORDINAL, OPTIMIZED_MERGE_ENABLE_DEFAULT, binaryDVCompressionMode);
+    }
+
     /** Doc values fields format with specified skipIndexIntervalSize. */
     public ES819TSDBDocValuesFormat(int skipIndexIntervalSize, int minDocsPerOrdinalForRangeEncoding, boolean enableOptimizedMerge, BinaryDVCompressionMode binaryDVCompressionMode) {
         super(CODEC_NAME);

From 1a48f0414b3f3b1d3f2e0086e7ce8cf2f66ae1d3 Mon Sep 17 00:00:00 2001
From: elasticsearchmachine <infra-root+elasticsearchmachine@elastic.co>
Date: Tue, 7 Oct 2025 14:09:01 +0000
Subject: [PATCH 14/15] [CI] Auto commit changes from spotless

---
 .../index/codec/PerFieldFormatSupplier.java   |  4 +++-
 .../tsdb/es819/ES819TSDBDocValuesFormat.java  | 21 ++++++++++++++++---
 .../es819/ES819TSDBDocValuesProducer.java     | 10 ++++++++-
 .../es819/ES819TSDBDocValuesFormatTests.java  |  1 -
 ...ValuesFormatVariableSkipIntervalTests.java | 14 +++++++++++--
 5 files changed, 42 insertions(+), 8 deletions(-)

diff --git a/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java b/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java
index 6d4cc5cc1276a..b0b6c72b3ef81 100644
--- a/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java
@@ -60,7 +60,9 @@ public class PerFieldFormatSupplier {
     private static final DocValuesFormat docValuesFormat = new Lucene90DocValuesFormat();
     private static final KnnVectorsFormat knnVectorsFormat = new Lucene99HnswVectorsFormat();
     private static final ES819TSDBDocValuesFormat tsdbDocValuesFormat = new ES819TSDBDocValuesFormat(BinaryDVCompressionMode.NO_COMPRESS);
-    private static final DocValuesFormat compressedBinaryDocValuesFormat = new ES819TSDBDocValuesFormat(BinaryDVCompressionMode.COMPRESSED_WITH_LZ4);
+    private static final DocValuesFormat compressedBinaryDocValuesFormat = new ES819TSDBDocValuesFormat(
+        BinaryDVCompressionMode.COMPRESSED_WITH_LZ4
+    );
     private static final ES812PostingsFormat es812PostingsFormat = new ES812PostingsFormat();
     private static final PostingsFormat completionPostingsFormat = PostingsFormat.forName("Completion101");
 
diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java
index fd776c8c73e74..59eadd7825ec2 100644
--- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java
@@ -125,15 +125,30 @@ private static boolean getOptimizedMergeEnabledDefault() {
 
     /** Default constructor. */
     public ES819TSDBDocValuesFormat() {
-        this(DEFAULT_SKIP_INDEX_INTERVAL_SIZE, ORDINAL_RANGE_ENCODING_MIN_DOC_PER_ORDINAL, OPTIMIZED_MERGE_ENABLE_DEFAULT, BinaryDVCompressionMode.COMPRESSED_WITH_LZ4);
+        this(
+            DEFAULT_SKIP_INDEX_INTERVAL_SIZE,
+            ORDINAL_RANGE_ENCODING_MIN_DOC_PER_ORDINAL,
+            OPTIMIZED_MERGE_ENABLE_DEFAULT,
+            BinaryDVCompressionMode.COMPRESSED_WITH_LZ4
+        );
     }
 
     public ES819TSDBDocValuesFormat(BinaryDVCompressionMode binaryDVCompressionMode) {
-        this(DEFAULT_SKIP_INDEX_INTERVAL_SIZE, ORDINAL_RANGE_ENCODING_MIN_DOC_PER_ORDINAL, OPTIMIZED_MERGE_ENABLE_DEFAULT, binaryDVCompressionMode);
+        this(
+            DEFAULT_SKIP_INDEX_INTERVAL_SIZE,
+            ORDINAL_RANGE_ENCODING_MIN_DOC_PER_ORDINAL,
+            OPTIMIZED_MERGE_ENABLE_DEFAULT,
+            binaryDVCompressionMode
+        );
     }
 
     /** Doc values fields format with specified skipIndexIntervalSize. */
-    public ES819TSDBDocValuesFormat(int skipIndexIntervalSize, int minDocsPerOrdinalForRangeEncoding, boolean enableOptimizedMerge, BinaryDVCompressionMode binaryDVCompressionMode) {
+    public ES819TSDBDocValuesFormat(
+        int skipIndexIntervalSize,
+        int minDocsPerOrdinalForRangeEncoding,
+        boolean enableOptimizedMerge,
+        BinaryDVCompressionMode binaryDVCompressionMode
+    ) {
         super(CODEC_NAME);
         this.binaryDVCompressionMode = binaryDVCompressionMode;
         if (skipIndexIntervalSize < 2) {
diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java
index 5ee52f7a4ab8f..00e5667a5d177 100644
--- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java
@@ -340,7 +340,14 @@ public BytesRef binaryValue() throws IOException {
                 }
 
                 @Override
-                public BlockLoader.Block tryRead(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset, boolean nullsFiltered, BlockDocValuesReader.ToDouble toDouble, boolean toInt) throws IOException {
+                public BlockLoader.Block tryRead(
+                    BlockLoader.BlockFactory factory,
+                    BlockLoader.Docs docs,
+                    int offset,
+                    boolean nullsFiltered,
+                    BlockDocValuesReader.ToDouble toDouble,
+                    boolean toInt
+                ) throws IOException {
                     return null;
                 }
             };
@@ -450,6 +457,7 @@ BytesRef decode(int docNumber) throws IOException {
             return uncompressedBytesRef;
         }
     }
+
     // END: Copied fom LUCENE-9211
     private abstract static class DenseBinaryDocValues extends BinaryDocValues implements BlockLoader.OptionalColumnAtATimeReader {
 
diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java
index eb4755cd9b0f3..e37ec4e1b95c5 100644
--- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java
+++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java
@@ -43,7 +43,6 @@
 import org.elasticsearch.index.codec.Elasticsearch900Lucene101Codec;
 import org.elasticsearch.index.codec.Elasticsearch92Lucene103Codec;
 import org.elasticsearch.index.codec.tsdb.BinaryDVCompressionMode;
-import org.elasticsearch.index.codec.tsdb.DocValuesCodecDuelTests;
 import org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormatTests;
 import org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesProducer.BaseDenseNumericValues;
 import org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesProducer.BaseSortedDocValues;
diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatVariableSkipIntervalTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatVariableSkipIntervalTests.java
index 0ecb042b3e215..2587942febb14 100644
--- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatVariableSkipIntervalTests.java
+++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatVariableSkipIntervalTests.java
@@ -19,14 +19,24 @@ public class ES819TSDBDocValuesFormatVariableSkipIntervalTests extends ES87TSDBD
     protected Codec getCodec() {
         // small interval size to test with many intervals
         return TestUtil.alwaysDocValuesFormat(
-            new ES819TSDBDocValuesFormat(random().nextInt(4, 16), random().nextInt(1, 32), random().nextBoolean(), ES819TSDBDocValuesFormatTests.randomCompressionMode())
+            new ES819TSDBDocValuesFormat(
+                random().nextInt(4, 16),
+                random().nextInt(1, 32),
+                random().nextBoolean(),
+                ES819TSDBDocValuesFormatTests.randomCompressionMode()
+            )
         );
     }
 
     public void testSkipIndexIntervalSize() {
         IllegalArgumentException ex = expectThrows(
             IllegalArgumentException.class,
-            () -> new ES819TSDBDocValuesFormat(random().nextInt(Integer.MIN_VALUE, 2), random().nextInt(1, 32), random().nextBoolean(), ES819TSDBDocValuesFormatTests.randomCompressionMode())
+            () -> new ES819TSDBDocValuesFormat(
+                random().nextInt(Integer.MIN_VALUE, 2),
+                random().nextInt(1, 32),
+                random().nextBoolean(),
+                ES819TSDBDocValuesFormatTests.randomCompressionMode()
+            )
         );
         assertTrue(ex.getMessage().contains("skipIndexIntervalSize must be > 1"));
     }

From 0bdb7850187de7756564b61c1fce0cf54b41a4dc Mon Sep 17 00:00:00 2001
From: Parker Timmins <parker.timmins@elastic.co>
Date: Tue, 7 Oct 2025 10:24:08 -0500
Subject: [PATCH 15/15] Fix compilation error in benchmark

---
 .../index/codec/tsdb/TSDBDocValuesMergeBenchmark.java          | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/TSDBDocValuesMergeBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/TSDBDocValuesMergeBenchmark.java
index a3b2fd3633adf..94a080db539dc 100644
--- a/benchmarks/src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/TSDBDocValuesMergeBenchmark.java
+++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/TSDBDocValuesMergeBenchmark.java
@@ -27,6 +27,7 @@
 import org.elasticsearch.cluster.metadata.DataStream;
 import org.elasticsearch.common.logging.LogConfigurator;
 import org.elasticsearch.index.codec.Elasticsearch92Lucene103Codec;
+import org.elasticsearch.index.codec.tsdb.BinaryDVCompressionMode;
 import org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat;
 import org.openjdk.jmh.annotations.Benchmark;
 import org.openjdk.jmh.annotations.BenchmarkMode;
@@ -257,7 +258,7 @@ private static IndexWriterConfig createIndexWriterConfig(boolean optimizedMergeE
         );
         config.setLeafSorter(DataStream.TIMESERIES_LEAF_READERS_SORTER);
         config.setMergePolicy(new LogByteSizeMergePolicy());
-        var docValuesFormat = new ES819TSDBDocValuesFormat(4096, 512, optimizedMergeEnabled);
+        var docValuesFormat = new ES819TSDBDocValuesFormat(4096, 512, optimizedMergeEnabled, BinaryDVCompressionMode.COMPRESSED_WITH_LZ4);
         config.setCodec(new Elasticsearch92Lucene103Codec() {
             @Override
             public DocValuesFormat getDocValuesFormatForField(String field) {