elastic
diff --git a/‎benchmarks/src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/TSDBDocValuesMergeBenchmark.java‎
Lines changed: 6 additions & 3 deletions b/‎benchmarks/src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/TSDBDocValuesMergeBenchmark.java‎
Lines changed: 6 additions & 3 deletions
diff --git a/‎docs/changelog/142772.yaml‎
Lines changed: 5 additions & 0 deletions b/‎docs/changelog/142772.yaml‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎server/src/main/java/module-info.java‎
Lines changed: 1 addition & 0 deletions b/‎server/src/main/java/module-info.java‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎server/src/main/java/org/elasticsearch/index/IndexVersions.java‎
Lines changed: 1 addition & 0 deletions b/‎server/src/main/java/org/elasticsearch/index/IndexVersions.java‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java‎
Lines changed: 4 additions & 6 deletions b/‎server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java‎
Lines changed: 4 additions & 6 deletions
diff --git a/‎server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/DocOffsetsCodec.java‎
Lines changed: 161 additions & 0 deletions b/‎server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/DocOffsetsCodec.java‎
Lines changed: 161 additions & 0 deletions
diff --git a/‎server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java‎
Lines changed: 4 additions & 10 deletions b/‎server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java‎
Lines changed: 4 additions & 10 deletions
diff --git a/‎server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java‎
Lines changed: 29 additions & 4 deletions b/‎server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java‎
Lines changed: 29 additions & 4 deletions
@@ -28,7 +28,7 @@
 import org.elasticsearch.common.logging.LogConfigurator;
 import org.elasticsearch.index.codec.Elasticsearch92Lucene103Codec;
 import org.elasticsearch.index.codec.tsdb.BinaryDVCompressionMode;
-import org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat;
+import org.elasticsearch.index.codec.tsdb.es819.ES819Version3TSDBDocValuesFormat;
 import org.openjdk.jmh.annotations.Benchmark;
 import org.openjdk.jmh.annotations.BenchmarkMode;
 import org.openjdk.jmh.annotations.Fork;
@@ -54,6 +54,8 @@
 import java.util.concurrent.TimeUnit;
 import java.util.function.Supplier;
 
+import static org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat.NUMERIC_LARGE_BLOCK_SHIFT;
+
 @BenchmarkMode(Mode.SingleShotTime)
 @OutputTimeUnit(TimeUnit.MILLISECONDS)
 @State(Scope.Benchmark)
@@ -258,12 +260,13 @@ private static IndexWriterConfig createIndexWriterConfig(boolean optimizedMergeE
         );
         config.setLeafSorter(DataStream.TIMESERIES_LEAF_READERS_SORTER);
         config.setMergePolicy(new LogByteSizeMergePolicy());
-        var docValuesFormat = new ES819TSDBDocValuesFormat(
+        var docValuesFormat = new ES819Version3TSDBDocValuesFormat(
             4096,
             512,
             optimizedMergeEnabled,
             BinaryDVCompressionMode.COMPRESSED_ZSTD_LEVEL_1,
-            true
+            true,
+            NUMERIC_LARGE_BLOCK_SHIFT
         );
         config.setCodec(new Elasticsearch92Lucene103Codec() {
             @Override
 
@@ -0,0 +1,5 @@
+area: Codec
+issues: []
+pr: 142772
+summary: "ES819 Binary doc values: compact doc offsets using bit packing"
+type: enhancement
@@ -458,6 +458,7 @@
         with
             org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormat,
             org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat,
+            org.elasticsearch.index.codec.tsdb.es819.ES819Version3TSDBDocValuesFormat,
             org.elasticsearch.index.codec.bloomfilter.ES94BloomFilterDocValuesFormat;
     provides org.apache.lucene.codecs.KnnVectorsFormat
         with
 
@@ -222,6 +222,7 @@ private static Version parseUnchecked(String version) {
     public static final IndexVersion DISK_BBQ_QUANTIZE_BITS = def(9_069_0_00, Version.LUCENE_10_3_2);
     public static final IndexVersion ID_FIELD_USE_ES812_POSTINGS_FORMAT = def(9_070_0_00, Version.LUCENE_10_3_2);
     public static final IndexVersion TIME_SERIES_USE_SYNTHETIC_ID_94 = def(9_071_0_00, Version.LUCENE_10_3_2);
+    public static final IndexVersion TIME_SERIES_DOC_VALUES_FORMAT_VERSION_3 = def(9_072_0_00, Version.LUCENE_10_3_2);
 
     /*
      * STOP! READ THIS FIRST! No, really,
 
@@ -22,7 +22,7 @@
 import org.elasticsearch.index.codec.bloomfilter.ES94BloomFilterDocValuesFormat;
 import org.elasticsearch.index.codec.postings.ES812PostingsFormat;
 import org.elasticsearch.index.codec.tsdb.TSDBSyntheticIdPostingsFormat;
-import org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat;
+import org.elasticsearch.index.codec.tsdb.es819.TSDBDocValuesFormatFactory;
 import org.elasticsearch.index.codec.vectors.es93.ES93HnswVectorsFormat;
 import org.elasticsearch.index.mapper.CompletionFieldMapper;
 import org.elasticsearch.index.mapper.IdFieldMapper;
@@ -67,8 +67,6 @@ public class PerFieldFormatSupplier {
 
     private static final DocValuesFormat docValuesFormat = new Lucene90DocValuesFormat();
     private final KnnVectorsFormat knnVectorsFormat;
-    private static final ES819TSDBDocValuesFormat tsdbDocValuesFormat = ES819TSDBDocValuesFormat.getInstance(false);
-    private static final ES819TSDBDocValuesFormat tsdbDocValuesFormatLargeNumericBlock = ES819TSDBDocValuesFormat.getInstance(true);
     private static final ES812PostingsFormat es812PostingsFormat = new ES812PostingsFormat();
     private static final PostingsFormat completionPostingsFormat = PostingsFormat.forName("Completion101");
 
@@ -197,9 +195,9 @@ public DocValuesFormat getDocValuesFormatForField(String field) {
         }
 
         if (useTSDBDocValuesFormat(field)) {
-            return (mapperService != null && mapperService.getIndexSettings().isUseTimeSeriesDocValuesFormatLargeBlockSize())
-                ? tsdbDocValuesFormatLargeNumericBlock
-                : tsdbDocValuesFormat;
+            var indexCreatedVersion = mapperService.getIndexSettings().getIndexVersionCreated();
+            boolean useLargeBlockSize = mapperService.getIndexSettings().isUseTimeSeriesDocValuesFormatLargeBlockSize();
+            return TSDBDocValuesFormatFactory.createDocValuesFormat(indexCreatedVersion, useLargeBlockSize);
         }
 
         return docValuesFormat;
 
@@ -0,0 +1,161 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.index.codec.tsdb.es819;
+
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.util.GroupVIntUtil;
+import org.apache.lucene.util.packed.PackedInts;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+/**
+ * Represents a codec for encoding and decoding document offsets.
+ * Each codec defines custom strategies for compression and decompression
+ * of document offsets for compressed binary doc values.
+ */
+enum DocOffsetsCodec {
+
+    /**
+     * A codec that uses delta encoding and bit-packing for storage of document offsets.
+     */
+    BITPACKING {
+        @Override
+        public Encoder getEncoder() {
+            return (docOffsets, numDocsInCurrentBlock, output) -> {
+                int numOffsets = numDocsInCurrentBlock + 1;
+                // delta encode
+                int maxDelta = 0;
+                for (int i = numOffsets - 1; i > 0; i--) {
+                    docOffsets[i] -= docOffsets[i - 1];
+                    maxDelta = Math.max(maxDelta, docOffsets[i]);
+                }
+                int bitsPerValue = maxDelta == 0 ? 0 : PackedInts.bitsRequired(maxDelta);
+                output.writeByte((byte) bitsPerValue);
+                if (bitsPerValue > 0) {
+                    long accumulator = 0;
+                    int bitsInAccumulator = 0;
+                    for (int i = 0; i < numOffsets; i++) {
+                        accumulator = (accumulator << bitsPerValue) | docOffsets[i];
+                        bitsInAccumulator += bitsPerValue;
+                        while (bitsInAccumulator >= 8) {
+                            bitsInAccumulator -= 8;
+                            output.writeByte((byte) (accumulator >>> bitsInAccumulator));
+                        }
+                    }
+                    if (bitsInAccumulator > 0) {
+                        output.writeByte((byte) (accumulator << (8 - bitsInAccumulator)));
+                    }
+                }
+            };
+        }
+
+        @Override
+        public Decoder getDecoder() {
+            return (docOffsets, numDocsInBlock, input) -> {
+                int numOffsets = numDocsInBlock + 1;
+                int bitsPerValue = input.readByte() & 0xFF;
+                if (bitsPerValue == 0) {
+                    Arrays.fill(docOffsets, 0, numOffsets, 0);
+                } else {
+                    int totalBits = numOffsets * bitsPerValue;
+                    int totalBytes = (totalBits + 7) / 8;
+                    long accumulator = 0;
+                    int bitsInAccumulator = 0;
+                    int offsetIndex = 0;
+                    int mask = (1 << bitsPerValue) - 1;
+                    for (int i = 0; i < totalBytes && offsetIndex < numOffsets; i++) {
+                        accumulator = (accumulator << 8) | (input.readByte() & 0xFF);
+                        bitsInAccumulator += 8;
+                        while (bitsInAccumulator >= bitsPerValue && offsetIndex < numOffsets) {
+                            bitsInAccumulator -= bitsPerValue;
+                            docOffsets[offsetIndex++] = (int) ((accumulator >>> bitsInAccumulator) & mask);
+                        }
+                    }
+                }
+                deltaDecode(docOffsets, numOffsets);
+            };
+        }
+    },
+    /**
+     * A codec that uses grouped VInts for storage of document offsets.
+     */
+    GROUPED_VINT {
+        @Override
+        public Encoder getEncoder() {
+            return (docOffsets, numDocsInCurrentBlock, output) -> {
+                int numOffsets = numDocsInCurrentBlock + 1;
+                // delta encode
+                for (int i = numOffsets - 1; i > 0; i--) {
+                    docOffsets[i] -= docOffsets[i - 1];
+                }
+                output.writeGroupVInts(docOffsets, numOffsets);
+            };
+        }
+
+        @Override
+        public Decoder getDecoder() {
+            return (docOffsets, numDocsInBlock, input) -> {
+                int numOffsets = numDocsInBlock + 1;
+                GroupVIntUtil.readGroupVInts(input, docOffsets, numOffsets);
+                deltaDecode(docOffsets, numOffsets);
+            };
+        }
+    };
+
+    public abstract Encoder getEncoder();
+
+    public abstract Decoder getDecoder();
+
+    /**
+     * An encoder to store doc offsets in a more space-efficient format for storage.
+     */
+    @FunctionalInterface
+    public interface Encoder {
+        /**
+         * Encodes doc offsets in a more space efficient format for storage.
+         *
+         * @param docOffsets            an array of document offsets to encode
+         * @param numDocsInCurrentBlock the number of documents in the current block to encode
+         * @param output                the {@link DataOutput} to which the encoded data is written
+         * @throws IOException          if an I/O error occurs during the encoding process
+         */
+        void encode(int[] docOffsets, int numDocsInCurrentBlock, DataOutput output) throws IOException;
+    }
+
+    /**
+     * A decoder to decode the format on disk to doc offsets.
+     * A decoder performs the operations that an encoder performs in reverse order.
+     */
+    @FunctionalInterface
+    public interface Decoder {
+        /**
+         * Decodes the format on disk to doc offsets.
+         *
+         * @param docOffsets        the array to store decoded document offsets
+         * @param numDocsInBlock    the number of documents in the block to be decoded
+         * @param input             the input source containing encoded data to be decoded
+         * @throws IOException      if an I/O error occurs during decoding
+         */
+        void decode(int[] docOffsets, int numDocsInBlock, DataInput input) throws IOException;
+    }
+
+    // Borrowed from to TSDBDocValuesEncoder.decodeDelta
+    // The `sum` variable helps compiler optimize method, should not be removed.
+    void deltaDecode(int[] arr, int length) {
+        int sum = 0;
+        for (int i = 0; i < length; ++i) {
+            sum += arr[i];
+            arr[i] = sum;
+        }
+    }
+
+}
@@ -78,10 +78,12 @@ final class ES819TSDBDocValuesConsumer extends XDocValuesConsumer {
     final SegmentWriteState state;
     final BinaryDVCompressionMode binaryDVCompressionMode;
     private final boolean enablePerBlockCompression; // only false for testing
+    private final DocOffsetsCodec.Encoder docOffsetsEncoder;
 
     ES819TSDBDocValuesConsumer(
         BinaryDVCompressionMode binaryDVCompressionMode,
         final boolean enablePerBlockCompression,
+        DocOffsetsCodec.Encoder docOffsetsEncoder,
         SegmentWriteState state,
         int skipIndexIntervalSize,
         int minDocsPerOrdinalForOrdinalRangeEncoding,
@@ -94,6 +96,7 @@ final class ES819TSDBDocValuesConsumer extends XDocValuesConsumer {
     ) throws IOException {
         this.binaryDVCompressionMode = binaryDVCompressionMode;
         this.enablePerBlockCompression = enablePerBlockCompression;
+        this.docOffsetsEncoder = docOffsetsEncoder;
         this.state = state;
         this.termsDictBuffer = new byte[1 << 14];
         this.dir = state.directory;
@@ -584,7 +587,7 @@ public void flushData() throws IOException {
             maxUncompressedBlockLength = Math.max(maxUncompressedBlockLength, uncompressedBlockLength);
             maxNumDocsInAnyBlock = Math.max(maxNumDocsInAnyBlock, numDocsInCurrentBlock);
 
-            compressOffsets(data, numDocsInCurrentBlock);
+            docOffsetsEncoder.encode(docOffsets, numDocsInCurrentBlock, data);
 
             if (shouldCompress) {
                 compress(block, uncompressedBlockLength, data);
@@ -597,15 +600,6 @@ public void flushData() throws IOException {
             numDocsInCurrentBlock = uncompressedBlockLength = 0;
         }
 
-        void compressOffsets(DataOutput output, int numDocsInCurrentBlock) throws IOException {
-            int numOffsets = numDocsInCurrentBlock + 1;
-            // delta encode
-            for (int i = numOffsets - 1; i > 0; i--) {
-                docOffsets[i] -= docOffsets[i - 1];
-            }
-            output.writeGroupVInts(docOffsets, numOffsets);
-        }
-
         void compress(byte[] data, int uncompressedLength, DataOutput output) throws IOException {
             ByteBuffer inputBuffer = ByteBuffer.wrap(data, 0, uncompressedLength);
             ByteBuffersDataInput input = new ByteBuffersDataInput(List.of(inputBuffer));
 
@@ -38,7 +38,7 @@
 public class ES819TSDBDocValuesFormat extends org.apache.lucene.codecs.DocValuesFormat {
 
     static final int NUMERIC_BLOCK_SHIFT = 7;
-    static final int NUMERIC_LARGE_BLOCK_SHIFT = 9;
+    public static final int NUMERIC_LARGE_BLOCK_SHIFT = 9;
     static final int DIRECT_MONOTONIC_BLOCK_SHIFT = 16;
     static final String CODEC_NAME = "ES819TSDB";
     static final String DATA_CODEC = "ES819TSDBDocValuesData";
@@ -73,7 +73,7 @@ public class ES819TSDBDocValuesFormat extends org.apache.lucene.codecs.DocValues
     public static final int BLOCK_COUNT_THRESHOLD = 1024;
 
     // number of documents in an interval
-    private static final int DEFAULT_SKIP_INDEX_INTERVAL_SIZE = 4096;
+    static final int DEFAULT_SKIP_INDEX_INTERVAL_SIZE = 4096;
     // bytes on an interval:
     // * 1 byte : number of levels
     // * 16 bytes: min / max value,
@@ -139,6 +139,7 @@ private static boolean getOptimizedMergeEnabledDefault() {
     final boolean enableOptimizedMerge;
     final BinaryDVCompressionMode binaryDVCompressionMode;
     final boolean enablePerBlockCompression;
+    final DocOffsetsCodec docOffsetsCodec;
 
     public static ES819TSDBDocValuesFormat getInstance(boolean useLargeNumericBlock) {
         return useLargeNumericBlock ? new ES819TSDBDocValuesFormat(NUMERIC_LARGE_BLOCK_SHIFT) : new ES819TSDBDocValuesFormat();
@@ -207,7 +208,29 @@ public ES819TSDBDocValuesFormat(
         final boolean enablePerBlockCompression,
         final int numericBlockShift
     ) {
-        super(CODEC_NAME);
+        this(
+            CODEC_NAME,
+            skipIndexIntervalSize,
+            minDocsPerOrdinalForRangeEncoding,
+            enableOptimizedMerge,
+            binaryDVCompressionMode,
+            enablePerBlockCompression,
+            numericBlockShift,
+            DocOffsetsCodec.GROUPED_VINT
+        );
+    }
+
+    public ES819TSDBDocValuesFormat(
+        String codecName,
+        int skipIndexIntervalSize,
+        int minDocsPerOrdinalForRangeEncoding,
+        boolean enableOptimizedMerge,
+        BinaryDVCompressionMode binaryDVCompressionMode,
+        final boolean enablePerBlockCompression,
+        final int numericBlockShift,
+        DocOffsetsCodec docOffsetsCodec
+    ) {
+        super(codecName);
         assert numericBlockShift == NUMERIC_BLOCK_SHIFT || numericBlockShift == NUMERIC_LARGE_BLOCK_SHIFT : numericBlockShift;
         if (skipIndexIntervalSize < 2) {
             throw new IllegalArgumentException("skipIndexIntervalSize must be > 1, got [" + skipIndexIntervalSize + "]");
@@ -218,13 +241,15 @@ public ES819TSDBDocValuesFormat(
         this.binaryDVCompressionMode = binaryDVCompressionMode;
         this.enablePerBlockCompression = enablePerBlockCompression;
         this.numericBlockShift = numericBlockShift;
+        this.docOffsetsCodec = docOffsetsCodec;
     }
 
     @Override
     public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
         return new ES819TSDBDocValuesConsumer(
             binaryDVCompressionMode,
             enablePerBlockCompression,
+            docOffsetsCodec.getEncoder(),
             state,
             skipIndexIntervalSize,
             minDocsPerOrdinalForRangeEncoding,
@@ -239,6 +264,6 @@ public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOExcept
 
     @Override
     public DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException {
-        return new ES819TSDBDocValuesProducer(state, DATA_CODEC, DATA_EXTENSION, META_CODEC, META_EXTENSION);
+        return new ES819TSDBDocValuesProducer(state, DATA_CODEC, DATA_EXTENSION, META_CODEC, META_EXTENSION, docOffsetsCodec.getDecoder());
     }
 }