elastic · parkertimmins · Nov 19, 2025 · Oct 23, 2025 · Oct 23, 2025 · Oct 23, 2025
diff --git a/...c/main/java/org/elasticsearch/benchmark/index/codec/tsdb/TSDBDocValuesMergeBenchmark.java b/...c/main/java/org/elasticsearch/benchmark/index/codec/tsdb/TSDBDocValuesMergeBenchmark.java
@@ -27,6 +27,7 @@
 import org.elasticsearch.cluster.metadata.DataStream;
 import org.elasticsearch.common.logging.LogConfigurator;
 import org.elasticsearch.index.codec.Elasticsearch92Lucene103Codec;
+import org.elasticsearch.index.codec.tsdb.BinaryDVCompressionMode;
 import org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat;
 import org.openjdk.jmh.annotations.Benchmark;
 import org.openjdk.jmh.annotations.BenchmarkMode;
@@ -257,7 +258,7 @@ private static IndexWriterConfig createIndexWriterConfig(boolean optimizedMergeE
         );
         config.setLeafSorter(DataStream.TIMESERIES_LEAF_READERS_SORTER);
         config.setMergePolicy(new LogByteSizeMergePolicy());
-        var docValuesFormat = new ES819TSDBDocValuesFormat(4096, 512, optimizedMergeEnabled);
+        var docValuesFormat = new ES819TSDBDocValuesFormat(4096, 512, optimizedMergeEnabled, BinaryDVCompressionMode.COMPRESSED_WITH_ZSTD);
         config.setCodec(new Elasticsearch92Lucene103Codec() {
             @Override
             public DocValuesFormat getDocValuesFormatForField(String field) {

diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/BinaryDVCompressionMode.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/BinaryDVCompressionMode.java
@@ -0,0 +1,30 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.index.codec.tsdb;
+
+public enum BinaryDVCompressionMode {
+
+    NO_COMPRESS((byte) 0),
+    COMPRESSED_WITH_ZSTD((byte) 1);
+
+    public final byte code;
+
+    BinaryDVCompressionMode(byte code) {
+        this.code = code;
+    }
+
+    public static BinaryDVCompressionMode fromMode(byte mode) {
+        return switch (mode) {
+            case 0 -> NO_COMPRESS;
+            case 1 -> COMPRESSED_WITH_ZSTD;
+            default -> throw new IllegalStateException("unknown compression mode [" + mode + "]");
+        };
+    }
+}
diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/DelayedOffsetAccumulator.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/DelayedOffsetAccumulator.java
@@ -0,0 +1,100 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.index.codec.tsdb.es819;
+
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.store.ChecksumIndexInput;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.packed.DirectMonotonicWriter;
+import org.elasticsearch.core.IOUtils;
+
+import java.io.Closeable;
+import java.io.IOException;
+
+/**
+ *  Like OffsetsAccumulator builds offsets and stores in a DirectMonotonicWriter. But write to temp file
+ *  rather than directly to a DirectMonotonicWriter because the number of values is unknown. If number of
+ *  values if known prefer OffsetsWriter.
+ */
+final class DelayedOffsetAccumulator implements Closeable {
+    private final Directory dir;
+    private final long startOffset;
+
+    private int numValues = 0;
+    private final IndexOutput tempOutput;
+    private final String suffix;
+
+    DelayedOffsetAccumulator(Directory dir, IOContext context, IndexOutput data, String suffix, long startOffset) throws IOException {
+        this.dir = dir;
+        this.startOffset = startOffset;
+        this.suffix = suffix;
+
+        boolean success = false;
+        try {
+            tempOutput = dir.createTempOutput(data.getName(), suffix, context);
+            CodecUtil.writeHeader(tempOutput, ES819TSDBDocValuesFormat.META_CODEC + suffix, ES819TSDBDocValuesFormat.VERSION_CURRENT);
+            success = true;
+        } finally {
+            if (success == false) {
+                IOUtils.closeWhileHandlingException(this); // self-close because constructor caller can't
+            }
+        }
+    }
+
+    public void addDoc(long delta) throws IOException {
+        tempOutput.writeVLong(delta);
+        numValues++;
+    }
+
+    public void build(IndexOutput meta, IndexOutput data) throws IOException {
+        CodecUtil.writeFooter(tempOutput);
+        IOUtils.close(tempOutput);
+
+        // write the offsets info to the meta file by reading from temp file
+        try (ChecksumIndexInput tempInput = dir.openChecksumInput(tempOutput.getName());) {
+            CodecUtil.checkHeader(
+                tempInput,
+                ES819TSDBDocValuesFormat.META_CODEC + suffix,
+                ES819TSDBDocValuesFormat.VERSION_CURRENT,
+                ES819TSDBDocValuesFormat.VERSION_CURRENT
+            );
+            Throwable priorE = null;
+            try {
+                final DirectMonotonicWriter writer = DirectMonotonicWriter.getInstance(
+                    meta,
+                    data,
+                    numValues + 1,
+                    ES819TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT
+                );
+
+                long offset = startOffset;
+                writer.add(offset);
+                for (int i = 0; i < numValues; ++i) {
+                    offset += tempInput.readVLong();
+                    writer.add(offset);
+                }
+                writer.finish();
+            } catch (Throwable e) {
+                priorE = e;
+            } finally {
+                CodecUtil.checkFooter(tempInput, priorE);
+            }
+        }
+    }
+
+    @Override
+    public void close() throws IOException {
+        if (tempOutput != null) {
+            IOUtils.close(tempOutput, () -> dir.deleteFile(tempOutput.getName()));
+        }
+    }
+}
diff --git a/...er/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java b/...er/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java
@@ -27,8 +27,10 @@
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.SortedSetSelector;
 import org.apache.lucene.store.ByteArrayDataOutput;
+import org.apache.lucene.store.ByteBuffersDataInput;
 import org.apache.lucene.store.ByteBuffersDataOutput;
 import org.apache.lucene.store.ByteBuffersIndexOutput;
+import org.apache.lucene.store.DataOutput;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.IndexOutput;
@@ -41,15 +43,20 @@
 import org.apache.lucene.util.packed.DirectMonotonicWriter;
 import org.apache.lucene.util.packed.PackedInts;
 import org.elasticsearch.core.IOUtils;
+import org.elasticsearch.index.codec.tsdb.BinaryDVCompressionMode;
 import org.elasticsearch.index.codec.tsdb.TSDBDocValuesEncoder;
+import org.elasticsearch.index.codec.zstd.Zstd814StoredFieldsFormat;
 
+import java.io.Closeable;
 import java.io.IOException;
+import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 
 import static org.elasticsearch.index.codec.tsdb.es819.DocValuesConsumerUtil.compatibleWithOptimizedMerge;
 import static org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT;
+import static org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE;
 import static org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat.SKIP_INDEX_LEVEL_SHIFT;
 import static org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat.SKIP_INDEX_MAX_LEVEL;
 import static org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat.SORTED_SET;
@@ -65,8 +72,11 @@ final class ES819TSDBDocValuesConsumer extends XDocValuesConsumer {
     private final int minDocsPerOrdinalForOrdinalRangeEncoding;
     final boolean enableOptimizedMerge;
     private final int primarySortFieldNumber;
+    final SegmentWriteState state;
+    final BinaryDVCompressionMode binaryDVCompressionMode;
 
     ES819TSDBDocValuesConsumer(
+        BinaryDVCompressionMode binaryDVCompressionMode,
         SegmentWriteState state,
         int skipIndexIntervalSize,
         int minDocsPerOrdinalForOrdinalRangeEncoding,
@@ -76,6 +86,8 @@ final class ES819TSDBDocValuesConsumer extends XDocValuesConsumer {
         String metaCodec,
         String metaExtension
     ) throws IOException {
+        this.binaryDVCompressionMode = binaryDVCompressionMode;
+        this.state = state;
         this.termsDictBuffer = new byte[1 << 14];
         this.dir = state.directory;
         this.minDocsPerOrdinalForOrdinalRangeEncoding = minDocsPerOrdinalForOrdinalRangeEncoding;
@@ -315,7 +327,14 @@ public void mergeBinaryField(FieldInfo mergeFieldInfo, MergeState mergeState) th
     public void addBinaryField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
         meta.writeInt(field.number);
         meta.writeByte(ES819TSDBDocValuesFormat.BINARY);
+        meta.writeByte(binaryDVCompressionMode.code);
+        switch (binaryDVCompressionMode) {
+            case NO_COMPRESS -> doAddUncompressedBinary(field, valuesProducer);
+            case COMPRESSED_WITH_ZSTD -> doAddCompressedBinary(field, valuesProducer);
+        }
+    }
 
+    public void doAddUncompressedBinary(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
         if (valuesProducer instanceof TsdbDocValuesProducer tsdbValuesProducer && tsdbValuesProducer.mergeStats.supported()) {
             final int numDocsWithField = tsdbValuesProducer.mergeStats.sumNumDocsWithField();
             final int minLength = tsdbValuesProducer.mergeStats.minLength();
@@ -444,6 +463,181 @@ public void addBinaryField(FieldInfo field, DocValuesProducer valuesProducer) th
         }
     }
 
+    public void doAddCompressedBinary(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
+        try (CompressedBinaryBlockWriter blockWriter = new CompressedBinaryBlockWriter()) {
+            BinaryDocValues values = valuesProducer.getBinary(field);
+            long start = data.getFilePointer();
+            meta.writeLong(start); // dataOffset
+            int numDocsWithField = 0;
+            int minLength = Integer.MAX_VALUE;
+            int maxLength = 0;
+            for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
+                numDocsWithField++;
+                BytesRef v = values.binaryValue();
+                blockWriter.addDoc(v);
+                int length = v.length;
+                minLength = Math.min(length, minLength);
+                maxLength = Math.max(length, maxLength);
+            }
+            blockWriter.flushData();
+
+            assert numDocsWithField <= maxDoc;
+            meta.writeLong(data.getFilePointer() - start); // dataLength
+
+            if (numDocsWithField == 0) {
+                meta.writeLong(-2); // docsWithFieldOffset
+                meta.writeLong(0L); // docsWithFieldLength
+                meta.writeShort((short) -1); // jumpTableEntryCount
+                meta.writeByte((byte) -1); // denseRankPower
+            } else if (numDocsWithField == maxDoc) {
+                meta.writeLong(-1); // docsWithFieldOffset
+                meta.writeLong(0L); // docsWithFieldLength
+                meta.writeShort((short) -1); // jumpTableEntryCount
+                meta.writeByte((byte) -1); // denseRankPower
+            } else {
+                long offset = data.getFilePointer();
+                meta.writeLong(offset); // docsWithFieldOffset
+                values = valuesProducer.getBinary(field);
+                final short jumpTableEntryCount = IndexedDISI.writeBitSet(values, data, IndexedDISI.DEFAULT_DENSE_RANK_POWER);
+                meta.writeLong(data.getFilePointer() - offset); // docsWithFieldLength
+                meta.writeShort(jumpTableEntryCount);
+                meta.writeByte(IndexedDISI.DEFAULT_DENSE_RANK_POWER);
+            }
+
+            meta.writeInt(numDocsWithField);
+            meta.writeInt(minLength);
+            meta.writeInt(maxLength);
+
+            blockWriter.writeMetaData();
+        }
+    }
+
+    private class CompressedBinaryBlockWriter implements Closeable {
+        static final int MIN_BLOCK_BYTES = 256 * 1024;
+        static final int START_BLOCK_DOCS = 1024;
+        static final int ZSTD_LEVEL = 1;
+
+        final Zstd814StoredFieldsFormat.ZstdCompressor compressor = new Zstd814StoredFieldsFormat.ZstdCompressor(ZSTD_LEVEL);
+
+        final TSDBDocValuesEncoder encoder = new TSDBDocValuesEncoder(ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE);
+        final long[] docOffsetsCompressBuffer = new long[ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE];
+        int[] docOffsets = new int[START_BLOCK_DOCS];
+
+        int uncompressedBlockLength = 0;
+        int maxUncompressedBlockLength = 0;
+        int numDocsInCurrentBlock = 0;
+
+        byte[] block = BytesRef.EMPTY_BYTES;
+        int totalChunks = 0;
+        long maxPointer = 0;
+        int maxNumDocsInAnyBlock = 0;
+
+        final DelayedOffsetAccumulator blockAddressAcc;
+        final DelayedOffsetAccumulator blockDocRangeAcc;
+
+        CompressedBinaryBlockWriter() throws IOException {
+            long blockAddressesStart = data.getFilePointer();
+            blockAddressAcc = new DelayedOffsetAccumulator(state.directory, state.context, data, "block-addresses", blockAddressesStart);
+
+            try {
+                blockDocRangeAcc = new DelayedOffsetAccumulator(state.directory, state.context, data, "block-doc-ranges", 0);
+            } catch (IOException e) {
+                blockAddressAcc.close();
+                throw e;
+            }
+        }
+
+        void addDoc(BytesRef v) throws IOException {
+            block = ArrayUtil.grow(block, uncompressedBlockLength + v.length);
+            System.arraycopy(v.bytes, v.offset, block, uncompressedBlockLength, v.length);
+            uncompressedBlockLength += v.length;
+
+            numDocsInCurrentBlock++;
+            docOffsets = ArrayUtil.grow(docOffsets, numDocsInCurrentBlock + 1); // need one extra since writing start for next block
+            docOffsets[numDocsInCurrentBlock] = uncompressedBlockLength;
+
+            if (uncompressedBlockLength > MIN_BLOCK_BYTES) {
+                flushData();
+            }
+        }
+
+        private void flushData() throws IOException {
+            if (numDocsInCurrentBlock > 0) {
+                totalChunks++;
+                long thisBlockStartPointer = data.getFilePointer();
+
+                // write length of string data
+                data.writeInt(uncompressedBlockLength);
+
+                maxUncompressedBlockLength = Math.max(maxUncompressedBlockLength, uncompressedBlockLength);
+                maxNumDocsInAnyBlock = Math.max(maxNumDocsInAnyBlock, numDocsInCurrentBlock);
+
+                compressOffsets(data, numDocsInCurrentBlock);
+                compress(block, uncompressedBlockLength, data);
+
+                blockDocRangeAcc.addDoc(numDocsInCurrentBlock);
+                numDocsInCurrentBlock = 0;
+
+                uncompressedBlockLength = 0;
+                maxPointer = data.getFilePointer();
+                long blockLenBytes = maxPointer - thisBlockStartPointer;
+                blockAddressAcc.addDoc(blockLenBytes);
+            }
+        }
+
+        void compressOffsets(DataOutput output, int numDocsInCurrentBlock) throws IOException {
+            int batchStart = 0;
+            int numOffsets = numDocsInCurrentBlock + 1;
+            while (batchStart < numOffsets) {
+                int batchLength = Math.min(numOffsets - batchStart, NUMERIC_BLOCK_SIZE);
+                for (int i = 0; i < batchLength; i++) {
+                    docOffsetsCompressBuffer[i] = docOffsets[batchStart + i];
+                }
+                if (batchLength < docOffsetsCompressBuffer.length) {
+                    Arrays.fill(docOffsetsCompressBuffer, batchLength, docOffsetsCompressBuffer.length, 0);
+                }
+                encoder.encode(docOffsetsCompressBuffer, output);
+                batchStart += batchLength;
+            }
+        }
+
+        void compress(byte[] data, int uncompressedLength, DataOutput output) throws IOException {
+            ByteBuffer inputBuffer = ByteBuffer.wrap(data, 0, uncompressedLength);
+            ByteBuffersDataInput input = new ByteBuffersDataInput(List.of(inputBuffer));
+            compressor.compress(input, output);
+        }
+
+        void writeMetaData() throws IOException {
+            if (totalChunks == 0) {
+                return;
+            }
+
+            long dataAddressesStart = data.getFilePointer();
+
+            meta.writeLong(dataAddressesStart);
+            meta.writeVInt(totalChunks);
+            meta.writeVInt(maxUncompressedBlockLength);
+            meta.writeVInt(maxNumDocsInAnyBlock);
+            meta.writeVInt(DIRECT_MONOTONIC_BLOCK_SHIFT);
+
+            blockAddressAcc.build(meta, data);
+            long dataDocRangeStart = data.getFilePointer();
+            long addressesLength = dataDocRangeStart - dataAddressesStart;
+            meta.writeLong(addressesLength);
+
+            meta.writeLong(dataDocRangeStart);
+            blockDocRangeAcc.build(meta, data);
+            long docRangesLen = data.getFilePointer() - dataDocRangeStart;
+            meta.writeLong(docRangesLen);
+        }
+
+        @Override
+        public void close() throws IOException {
+            blockDocRangeAcc.close();
+            blockAddressAcc.close();
+        }
+    }
+
     @Override
     public void addSortedField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
         meta.writeInt(field.number);