elastic · parkertimmins · Nov 19, 2025 · Oct 23, 2025 · Oct 23, 2025 · Oct 23, 2025
diff --git a/...c/main/java/org/elasticsearch/benchmark/index/codec/tsdb/TSDBDocValuesMergeBenchmark.java b/...c/main/java/org/elasticsearch/benchmark/index/codec/tsdb/TSDBDocValuesMergeBenchmark.java
@@ -27,6 +27,7 @@
 import org.elasticsearch.cluster.metadata.DataStream;
 import org.elasticsearch.common.logging.LogConfigurator;
 import org.elasticsearch.index.codec.Elasticsearch92Lucene103Codec;
+import org.elasticsearch.index.codec.tsdb.BinaryDVCompressionMode;
 import org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat;
 import org.openjdk.jmh.annotations.Benchmark;
 import org.openjdk.jmh.annotations.BenchmarkMode;
@@ -257,7 +258,13 @@ private static IndexWriterConfig createIndexWriterConfig(boolean optimizedMergeE
         );
         config.setLeafSorter(DataStream.TIMESERIES_LEAF_READERS_SORTER);
         config.setMergePolicy(new LogByteSizeMergePolicy());
-        var docValuesFormat = new ES819TSDBDocValuesFormat(4096, 512, optimizedMergeEnabled);
+        var docValuesFormat = new ES819TSDBDocValuesFormat(
+            4096,
+            512,
+            optimizedMergeEnabled,
+            BinaryDVCompressionMode.COMPRESSED_ZSTD_LEVEL_1,
+            true
+        );
         config.setCodec(new Elasticsearch92Lucene103Codec() {
             @Override
             public DocValuesFormat getDocValuesFormatForField(String field) {

diff --git a/docs/changelog/137139.yaml b/docs/changelog/137139.yaml
@@ -0,0 +1,27 @@
+pr: 137139
+summary: Add binary doc value compression with variable doc count blocks
+area: Mapping
+type: feature
+issues: []
+highlight:
+  title: Add binary doc value compression with variable doc count blocks
+  body: "Add compression for binary doc values using Zstd and blocks with a\nvariable\
+    \ number of values.\n\nBlock-wise LZ4 was previously added to Lucene in\n[LUCENE-9211](https://issues.apache.org/jira/browse/LUCENE-9211).\
+    \ This\nwas subsequently removed in\n[LUCENE-9378](https://issues.apache.org/jira/browse/LUCENE-9378)\
+    \ due to\nquery performance issues. \n\nWe investigated adding to adding the original\
+    \ Lucene implementation to\nES in https://github.com/elastic/elasticsearch/pull/112416\
+    \ and\nhttps://github.com/elastic/elasticsearch/pull/105301. This approach\nstores\
+    \ a constant number of values per block (specifically 32 values).\nThis is nice\
+    \ because it makes it very easy to map a given value index\n(eg docId for dense\
+    \ values) to the block containing it with `blockId =\ndocId / 32`. Unfortunately,\
+    \ if values are very large we cannot reduce\nthe number of values per block and\
+    \ (de)compressing a block could cause\nan OOM. Also, since this is a concern,\
+    \ we have to keep the number of\nvalues lower than ideal.\n\nThis PR instead stores\
+    \ a variable number of documents per block. It\nstores a minimum of 1 document\
+    \ per block and stops adding values when\nthe size of a block exceeds a threshold.\
+    \ Like the previous version is\nstores an array of address for the start of each\
+    \ block. Additionally, it\nstores are parallel array with the value index at the\
+    \ start of each\nblock. When looking up a given value index, if it is not in the\
+    \ current\nblock, we binary search the array of value index starts to find the\n\
+    blockId containing the value. Then look up the address of the block."
+  notable: true
diff --git a/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java b/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java
@@ -41,6 +41,7 @@
 public class PerFieldFormatSupplier {
 
     private static final Set<String> INCLUDE_META_FIELDS;
+    private static final Set<String> EXCLUDE_MAPPER_TYPES;
 
     static {
         // TODO: should we just allow all fields to use tsdb doc values codec?
@@ -53,6 +54,7 @@ public class PerFieldFormatSupplier {
         // Don't the include _recovery_source_size and _recovery_source fields, since their values can be trimmed away in
         // RecoverySourcePruneMergePolicy, which leads to inconsistencies between merge stats and actual values.
         INCLUDE_META_FIELDS = Collections.unmodifiableSet(includeMetaField);
+        EXCLUDE_MAPPER_TYPES = Set.of("geo_shape");
     }
 
     private static final DocValuesFormat docValuesFormat = new Lucene90DocValuesFormat();
@@ -145,6 +147,10 @@ boolean useTSDBDocValuesFormat(final String field) {
             return false;
         }
 
+        if (excludeMapperTypes(field)) {
+            return false;
+        }
+
         return mapperService != null
             && mapperService.getIndexSettings().useTimeSeriesDocValuesFormat()
             && mapperService.getIndexSettings().isES87TSDBCodecEnabled();
@@ -154,4 +160,29 @@ private boolean excludeFields(String fieldName) {
         return fieldName.startsWith("_") && INCLUDE_META_FIELDS.contains(fieldName) == false;
     }
 
+    private boolean excludeMapperTypes(String fieldName) {
+        var typeName = getMapperType(fieldName);
+        if (typeName == null) {
+            return false;
+        }
+        return EXCLUDE_MAPPER_TYPES.contains(getMapperType(fieldName));
+    }
+
+    private boolean isTimeSeriesModeIndex() {
+        return mapperService != null && IndexMode.TIME_SERIES == mapperService.getIndexSettings().getMode();
+    }
+
+    private boolean isLogsModeIndex() {
+        return mapperService != null && IndexMode.LOGSDB == mapperService.getIndexSettings().getMode();
+    }
+
+    String getMapperType(final String field) {
+        if (mapperService != null) {
+            Mapper mapper = mapperService.mappingLookup().getMapper(field);
+            if (mapper != null) {
+                return mapper.typeName();
+            }
+        }
+        return null;
+    }
 }
diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/BinaryDVCompressionMode.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/BinaryDVCompressionMode.java
@@ -0,0 +1,65 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.index.codec.tsdb;
+
+import org.apache.lucene.codecs.compressing.CompressionMode;
+import org.elasticsearch.index.codec.zstd.ZstdCompressionMode;
+
+public enum BinaryDVCompressionMode {
+
+    NO_COMPRESS((byte) 0, null),
+    COMPRESSED_ZSTD_LEVEL_1((byte) 1, new ZstdCompressionMode(1));
+
+    public final byte code;
+    private final CompressionMode compressionMode;
+
+    private static final BinaryDVCompressionMode[] values = new BinaryDVCompressionMode[values().length];
+    static {
+        for (BinaryDVCompressionMode mode : values()) {
+            values[mode.code] = mode;
+        }
+    }
+
+    BinaryDVCompressionMode(byte code, CompressionMode compressionMode) {
+        this.code = code;
+        this.compressionMode = compressionMode;
+    }
+
+    public static BinaryDVCompressionMode fromMode(byte code) {
+        if (code < 0 || code >= values.length) {
+            throw new IllegalStateException("unknown compression mode [" + code + "]");
+        }
+        return values[code];
+    }
+
+    public CompressionMode compressionMode() {
+        if (compressionMode == null) {
+            throw new UnsupportedOperationException("BinaryDVCompressionMode [" + code + "] does not support compression");
+        }
+        return compressionMode;
+    }
+
+    public record BlockHeader(boolean isCompressed) {
+        static final byte IS_COMPRESSED = 0x1;
+
+        public static BlockHeader fromByte(byte header) {
+            boolean isCompressed = (header & IS_COMPRESSED) != 0;
+            return new BlockHeader(isCompressed);
+        }
+
+        public byte toByte() {
+            byte header = 0;
+            if (isCompressed) {
+                header |= IS_COMPRESSED;
+            }
+            return header;
+        }
+    }
+}
diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/BlockMetadataAccumulator.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/BlockMetadataAccumulator.java
@@ -0,0 +1,142 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.index.codec.tsdb.es819;
+
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.store.ChecksumIndexInput;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.packed.DirectMonotonicWriter;
+import org.elasticsearch.core.IOUtils;
+
+import java.io.Closeable;
+import java.io.IOException;
+
+public final class BlockMetadataAccumulator implements Closeable {
+
+    private final DelayedOffsetAccumulator blockAddressAcc;
+    private final DelayedOffsetAccumulator blockDocRangeAcc;
+
+    BlockMetadataAccumulator(Directory dir, IOContext context, IndexOutput data, long addressesStart) throws IOException {
+        boolean success = false;
+        try {
+            blockDocRangeAcc = new DelayedOffsetAccumulator(dir, context, data, "block-doc-ranges", 0);
+            blockAddressAcc = new DelayedOffsetAccumulator(dir, context, data, "block-addresses", addressesStart);
+            success = true;
+        } finally {
+            if (success == false) {
+                IOUtils.closeWhileHandlingException(this); // self-close because constructor caller can't
+            }
+        }
+    }
+
+    public void addDoc(long numDocsInBlock, long blockLenInBytes) throws IOException {
+        blockDocRangeAcc.addDoc(numDocsInBlock);
+        blockAddressAcc.addDoc(blockLenInBytes);
+    }
+
+    public void build(IndexOutput meta, IndexOutput data) throws IOException {
+        long dataAddressesStart = data.getFilePointer();
+        blockAddressAcc.build(meta, data);
+        long dataDocRangeStart = data.getFilePointer();
+        long addressesLength = dataDocRangeStart - dataAddressesStart;
+        meta.writeLong(addressesLength);
+
+        meta.writeLong(dataDocRangeStart);
+        blockDocRangeAcc.build(meta, data);
+        long docRangesLen = data.getFilePointer() - dataDocRangeStart;
+        meta.writeLong(docRangesLen);
+    }
+
+    @Override
+    public void close() throws IOException {
+        IOUtils.closeWhileHandlingException(blockAddressAcc, blockDocRangeAcc);
+    }
+
+    /**
+     * Like OffsetsAccumulator builds offsets and stores in a DirectMonotonicWriter. But write to temp file
+     * rather than directly to a DirectMonotonicWriter because the number of values is unknown.
+     */
+    static final class DelayedOffsetAccumulator implements Closeable {
+
+        private final Directory dir;
+        private final long startOffset;
+
+        private int numValues = 0;
+        private final IndexOutput tempOutput;
+        private final String suffix;
+
+        DelayedOffsetAccumulator(Directory dir, IOContext context, IndexOutput data, String suffix, long startOffset) throws IOException {
+            this.dir = dir;
+            this.startOffset = startOffset;
+            this.suffix = suffix;
+
+            boolean success = false;
+            try {
+                tempOutput = dir.createTempOutput(data.getName(), suffix, context);
+                CodecUtil.writeHeader(tempOutput, ES819TSDBDocValuesFormat.META_CODEC + suffix, ES819TSDBDocValuesFormat.VERSION_CURRENT);
+                success = true;
+            } finally {
+                if (success == false) {
+                    IOUtils.closeWhileHandlingException(this); // self-close because constructor caller can't
+                }
+            }
+        }
+
+        void addDoc(long delta) throws IOException {
+            tempOutput.writeVLong(delta);
+            numValues++;
+        }
+
+        void build(IndexOutput meta, IndexOutput data) throws IOException {
+            CodecUtil.writeFooter(tempOutput);
+            IOUtils.close(tempOutput);
+
+            // write the offsets info to the meta file by reading from temp file
+            try (ChecksumIndexInput tempInput = dir.openChecksumInput(tempOutput.getName());) {
+                CodecUtil.checkHeader(
+                    tempInput,
+                    ES819TSDBDocValuesFormat.META_CODEC + suffix,
+                    ES819TSDBDocValuesFormat.VERSION_CURRENT,
+                    ES819TSDBDocValuesFormat.VERSION_CURRENT
+                );
+                Throwable priorE = null;
+                try {
+                    final DirectMonotonicWriter writer = DirectMonotonicWriter.getInstance(
+                        meta,
+                        data,
+                        numValues + 1,
+                        ES819TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT
+                    );
+
+                    long offset = startOffset;
+                    writer.add(offset);
+                    for (int i = 0; i < numValues; ++i) {
+                        offset += tempInput.readVLong();
+                        writer.add(offset);
+                    }
+                    writer.finish();
+                } catch (Throwable e) {
+                    priorE = e;
+                } finally {
+                    CodecUtil.checkFooter(tempInput, priorE);
+                }
+            }
+        }
+
+        @Override
+        public void close() throws IOException {
+            if (tempOutput != null) {
+                IOUtils.close(tempOutput, () -> dir.deleteFile(tempOutput.getName()));
+            }
+        }
+    }
+}