From 4d2841e37eb5224b3b470b71a07ef92fdbd37313 Mon Sep 17 00:00:00 2001
From: Benjamin Trent <4357155+benwtrent@users.noreply.github.com>
Date: Mon, 11 Aug 2025 14:45:06 -0400
Subject: [PATCH 1/6] Sort postings lists by doc ID

---
 .../vectors/DefaultIVFVectorsReader.java      |  9 +-
 .../vectors/DefaultIVFVectorsWriter.java      | 84 +++++++++++++++++--
 2 files changed, 86 insertions(+), 7 deletions(-)

diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsReader.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsReader.java
index 312172f251dda..2aec61ddd8aa5 100644
--- a/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsReader.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsReader.java
@@ -16,6 +16,7 @@
 import org.apache.lucene.search.KnnCollector;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.GroupVIntUtil;
 import org.apache.lucene.util.VectorUtil;
 import org.apache.lucene.util.hnsw.NeighborQueue;
 import org.elasticsearch.index.codec.vectors.reflect.OffHeapStats;
@@ -370,7 +371,13 @@ public int resetPostingsScorer(long offset) throws IOException {
             vectors = indexInput.readVInt();
             // read the doc ids
             assert vectors <= docIdsScratch.length;
-            docIdsWriter.readInts(indexInput, vectors, docIdsScratch);
+            GroupVIntUtil.readGroupVInts(indexInput, docIdsScratch, vectors);
+            // reconstitute from the deltas
+            int sum = 0;
+            for (int i = 1; i < vectors; i++) {
+                sum += docIdsScratch[i];
+                docIdsScratch[i] = sum;
+            }
             slicePos = indexInput.getFilePointer();
             return vectors;
         }
diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsWriter.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsWriter.java
index d16163d6934e8..6e5f09538d785 100644
--- a/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsWriter.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsWriter.java
@@ -17,6 +17,7 @@
 import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.IntroSorter;
 import org.apache.lucene.util.LongValues;
 import org.apache.lucene.util.VectorUtil;
 import org.apache.lucene.util.hnsw.IntToIntFunction;
@@ -101,7 +102,6 @@ LongValues buildAndWritePostingsLists(
         postingsOutput.writeVInt(maxPostingListSize);
         // write the posting lists
         final PackedLongValues.Builder offsets = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
-        DocIdsWriter docIdsWriter = new DocIdsWriter();
         DiskBBQBulkWriter bulkWriter = new DiskBBQBulkWriter.OneBitDiskBBQBulkWriter(ES91OSQVectorsScorer.BULK_SIZE, postingsOutput);
         OnHeapQuantizedVectors onHeapQuantizedVectors = new OnHeapQuantizedVectors(
             floatVectorValues,
@@ -109,6 +109,9 @@ LongValues buildAndWritePostingsLists(
             new OptimizedScalarQuantizer(fieldInfo.getVectorSimilarityFunction())
         );
         final ByteBuffer buffer = ByteBuffer.allocate(fieldInfo.getVectorDimension() * Float.BYTES).order(ByteOrder.LITTLE_ENDIAN);
+        int[] docIds = null;
+        int[] docDeltas = null;
+        int[] clusterOrds = null;
         for (int c = 0; c < centroidSupplier.size(); c++) {
             float[] centroid = centroidSupplier.centroid(c);
             int[] cluster = assignmentsByCluster[c];
@@ -121,11 +124,28 @@ LongValues buildAndWritePostingsLists(
             int size = cluster.length;
             // write docIds
             postingsOutput.writeVInt(size);
-            onHeapQuantizedVectors.reset(centroid, size, ord -> cluster[ord]);
+            if (docIds == null || docIds.length < cluster.length) {
+                docIds = new int[cluster.length];
+                clusterOrds = new int[cluster.length];
+                docDeltas = new int[cluster.length];
+            }
+            for (int j = 0; j < size; j++) {
+                docIds[j] = floatVectorValues.ordToDoc(cluster[j]);
+                clusterOrds[j] = j;
+            }
+            final int[] finalDocs = docIds;
+            final int[] finalOrds = clusterOrds;
+            // sort cluster.buffer by docIds values, this way cluster ordinals are sorted by docIds
+            new IntSorter(clusterOrds, i -> finalDocs[i]).sort(0, size);
+            // encode doc deltas
+            for (int j = 0; j < size; j++) {
+                docDeltas[j] = j == 0 ? finalDocs[finalOrds[j]] : finalDocs[finalOrds[j]] - finalDocs[finalOrds[j - 1]];
+            }
+            onHeapQuantizedVectors.reset(centroid, size, ord -> cluster[finalOrds[ord]]);
             // TODO we might want to consider putting the docIds in a separate file
             // to aid with only having to fetch vectors from slower storage when they are required
             // keeping them in the same file indicates we pull the entire file into cache
-            docIdsWriter.writeDocIds(j -> floatVectorValues.ordToDoc(cluster[j]), size, postingsOutput);
+            postingsOutput.writeGroupVInts(docDeltas, size);
             // write vectors
             bulkWriter.writeVectors(onHeapQuantizedVectors);
         }
@@ -233,12 +253,14 @@ LongValues buildAndWritePostingsLists(
                 quantizedVectorsInput,
                 fieldInfo.getVectorDimension()
             );
-            DocIdsWriter docIdsWriter = new DocIdsWriter();
             DiskBBQBulkWriter bulkWriter = new DiskBBQBulkWriter.OneBitDiskBBQBulkWriter(ES91OSQVectorsScorer.BULK_SIZE, postingsOutput);
             final ByteBuffer buffer = ByteBuffer.allocate(fieldInfo.getVectorDimension() * Float.BYTES).order(ByteOrder.LITTLE_ENDIAN);
             // write the max posting list size
             postingsOutput.writeVInt(maxPostingListSize);
             // write the posting lists
+            int[] docIds = null;
+            int[] docDeltas = null;
+            int[] clusterOrds = null;
             for (int c = 0; c < centroidSupplier.size(); c++) {
                 float[] centroid = centroidSupplier.centroid(c);
                 int[] cluster = assignmentsByCluster[c];
@@ -252,11 +274,28 @@ LongValues buildAndWritePostingsLists(
                 // write docIds
                 int size = cluster.length;
                 postingsOutput.writeVInt(size);
-                offHeapQuantizedVectors.reset(size, ord -> isOverspill[ord], ord -> cluster[ord]);
+                if (docIds == null || docIds.length < cluster.length) {
+                    docIds = new int[cluster.length];
+                    clusterOrds = new int[cluster.length];
+                    docDeltas = new int[cluster.length];
+                }
+                for (int j = 0; j < size; j++) {
+                    docIds[j] = floatVectorValues.ordToDoc(cluster[j]);
+                    clusterOrds[j] = j;
+                }
+                final int[] finalDocs = docIds;
+                final int[] finalOrds = clusterOrds;
+                // sort cluster.buffer by docIds values, this way cluster ordinals are sorted by docIds
+                new IntSorter(clusterOrds, i -> finalDocs[i]).sort(0, size);
+                // encode doc deltas
+                for (int j = 0; j < size; j++) {
+                    docDeltas[j] = j == 0 ? finalDocs[finalOrds[j]] : finalDocs[finalOrds[j]] - finalDocs[finalOrds[j - 1]];
+                }
+                offHeapQuantizedVectors.reset(size, ord -> isOverspill[finalOrds[ord]], ord -> cluster[finalOrds[ord]]);
                 // TODO we might want to consider putting the docIds in a separate file
                 // to aid with only having to fetch vectors from slower storage when they are required
                 // keeping them in the same file indicates we pull the entire file into cache
-                docIdsWriter.writeDocIds(j -> floatVectorValues.ordToDoc(cluster[j]), size, postingsOutput);
+                postingsOutput.writeGroupVInts(docDeltas, size);
                 // write vectors
                 bulkWriter.writeVectors(offHeapQuantizedVectors);
             }
@@ -717,4 +756,37 @@ public void readQuantizedVector(int ord, boolean isOverspill) throws IOException
             bitSum = quantizedVectorsInput.readShort();
         }
     }
+
+    private static class IntSorter extends IntroSorter {
+        int pivot = -1;
+        private final int[] arr;
+        private final IntToIntFunction func;
+
+        private IntSorter(int[] arr, IntToIntFunction func) {
+            this.arr = arr;
+            this.func = func;
+        }
+
+        @Override
+        protected void setPivot(int i) {
+            pivot = func.apply(arr[i]);
+        }
+
+        @Override
+        protected int comparePivot(int j) {
+            return Integer.compare(pivot, func.apply(arr[j]));
+        }
+
+        @Override
+        protected int compare(int a, int b) {
+            return Integer.compare(func.apply(arr[a]), func.apply(arr[b]));
+        }
+
+        @Override
+        protected void swap(int i, int j) {
+            final int tmp = arr[i];
+            arr[i] = arr[j];
+            arr[j] = tmp;
+        }
+    }
 }

From 8d8ea3611320e930b664fbb028e9ffb1f3e31964 Mon Sep 17 00:00:00 2001
From: Benjamin Trent <4357155+benwtrent@users.noreply.github.com>
Date: Mon, 11 Aug 2025 16:22:42 -0400
Subject: [PATCH 2/6] iter

---
 .../index/codec/vectors/DefaultIVFVectorsReader.java           | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsReader.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsReader.java
index 8759eb3d5420d..6460ef4b1766f 100644
--- a/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsReader.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsReader.java
@@ -327,7 +327,6 @@ private static class MemorySegmentPostingsVisitor implements PostingVisitor {
         final float[] centroid;
         long slicePos;
         OptimizedScalarQuantizer.QuantizationResult queryCorrections;
-        DocIdsWriter docIdsWriter = new DocIdsWriter();
 
         final float[] scratch;
         final int[] quantizationScratch;
@@ -373,7 +372,7 @@ public int resetPostingsScorer(long offset) throws IOException {
             GroupVIntUtil.readGroupVInts(indexInput, docIdsScratch, vectors);
             // reconstitute from the deltas
             int sum = 0;
-            for (int i = 1; i < vectors; i++) {
+            for (int i = 0; i < vectors; i++) {
                 sum += docIdsScratch[i];
                 docIdsScratch[i] = sum;
             }

From ba91845a433f226c1def39f1ace919b8c4173e41 Mon Sep 17 00:00:00 2001
From: Benjamin Trent <4357155+benwtrent@users.noreply.github.com>
Date: Mon, 11 Aug 2025 17:39:24 -0400
Subject: [PATCH 3/6] delta encode

---
 .../index/codec/vectors/DefaultIVFVectorsReader.java      | 4 ++--
 .../index/codec/vectors/DefaultIVFVectorsWriter.java      | 8 ++++++--
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsReader.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsReader.java
index 6460ef4b1766f..d80fc216e556c 100644
--- a/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsReader.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsReader.java
@@ -16,7 +16,6 @@
 import org.apache.lucene.search.KnnCollector;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.GroupVIntUtil;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.VectorUtil;
 import org.apache.lucene.util.hnsw.NeighborQueue;
@@ -332,6 +331,7 @@ private static class MemorySegmentPostingsVisitor implements PostingVisitor {
         final int[] quantizationScratch;
         final byte[] quantizedQueryScratch;
         final OptimizedScalarQuantizer quantizer;
+        final DocIdsWriter idsWriter = new DocIdsWriter();
         final float[] correctiveValues = new float[3];
         final long quantizedVectorByteSize;
 
@@ -369,7 +369,7 @@ public int resetPostingsScorer(long offset) throws IOException {
             vectors = indexInput.readVInt();
             // read the doc ids
             assert vectors <= docIdsScratch.length;
-            GroupVIntUtil.readGroupVInts(indexInput, docIdsScratch, vectors);
+            idsWriter.readInts(indexInput, vectors, docIdsScratch);
             // reconstitute from the deltas
             int sum = 0;
             for (int i = 0; i < vectors; i++) {
diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsWriter.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsWriter.java
index 6e5f09538d785..444ff9cf18a0a 100644
--- a/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsWriter.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsWriter.java
@@ -112,6 +112,7 @@ LongValues buildAndWritePostingsLists(
         int[] docIds = null;
         int[] docDeltas = null;
         int[] clusterOrds = null;
+        DocIdsWriter idsWriter = new DocIdsWriter();
         for (int c = 0; c < centroidSupplier.size(); c++) {
             float[] centroid = centroidSupplier.centroid(c);
             int[] cluster = assignmentsByCluster[c];
@@ -141,11 +142,12 @@ LongValues buildAndWritePostingsLists(
             for (int j = 0; j < size; j++) {
                 docDeltas[j] = j == 0 ? finalDocs[finalOrds[j]] : finalDocs[finalOrds[j]] - finalDocs[finalOrds[j - 1]];
             }
+            final int[] finalDocDeltas = docDeltas;
             onHeapQuantizedVectors.reset(centroid, size, ord -> cluster[finalOrds[ord]]);
             // TODO we might want to consider putting the docIds in a separate file
             // to aid with only having to fetch vectors from slower storage when they are required
             // keeping them in the same file indicates we pull the entire file into cache
-            postingsOutput.writeGroupVInts(docDeltas, size);
+            idsWriter.writeDocIds(i -> finalDocDeltas[i], size, postingsOutput);
             // write vectors
             bulkWriter.writeVectors(onHeapQuantizedVectors);
         }
@@ -261,6 +263,7 @@ LongValues buildAndWritePostingsLists(
             int[] docIds = null;
             int[] docDeltas = null;
             int[] clusterOrds = null;
+            DocIdsWriter idsWriter = new DocIdsWriter();
             for (int c = 0; c < centroidSupplier.size(); c++) {
                 float[] centroid = centroidSupplier.centroid(c);
                 int[] cluster = assignmentsByCluster[c];
@@ -291,11 +294,12 @@ LongValues buildAndWritePostingsLists(
                 for (int j = 0; j < size; j++) {
                     docDeltas[j] = j == 0 ? finalDocs[finalOrds[j]] : finalDocs[finalOrds[j]] - finalDocs[finalOrds[j - 1]];
                 }
+                final int[] finalDocDeltas = docDeltas;
                 offHeapQuantizedVectors.reset(size, ord -> isOverspill[finalOrds[ord]], ord -> cluster[finalOrds[ord]]);
                 // TODO we might want to consider putting the docIds in a separate file
                 // to aid with only having to fetch vectors from slower storage when they are required
                 // keeping them in the same file indicates we pull the entire file into cache
-                postingsOutput.writeGroupVInts(docDeltas, size);
+                idsWriter.writeDocIds(i -> finalDocDeltas[i], size, postingsOutput);
                 // write vectors
                 bulkWriter.writeVectors(offHeapQuantizedVectors);
             }

From 757c14949cf9399ad85b598d29555dc925cab539 Mon Sep 17 00:00:00 2001
From: Benjamin Trent <ben.w.trent@gmail.com>
Date: Tue, 12 Aug 2025 09:57:06 -0400
Subject: [PATCH 4/6] Update DocIdsWriter.java

---
 .../index/codec/vectors/DocIdsWriter.java     | 81 ++-----------------
 1 file changed, 5 insertions(+), 76 deletions(-)

diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/DocIdsWriter.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/DocIdsWriter.java
index d46a6301b60a2..05ae6467b4033 100644
--- a/server/src/main/java/org/elasticsearch/index/codec/vectors/DocIdsWriter.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/DocIdsWriter.java
@@ -23,8 +23,6 @@
 import org.apache.lucene.store.DataOutput;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.DocBaseBitSetIterator;
-import org.apache.lucene.util.FixedBitSet;
 import org.apache.lucene.util.IntsRef;
 import org.apache.lucene.util.LongsRef;
 import org.apache.lucene.util.hnsw.IntToIntFunction;
@@ -42,7 +40,6 @@ final class DocIdsWriter {
     public static final int DEFAULT_MAX_POINTS_IN_LEAF_NODE = 512;
 
     private static final byte CONTINUOUS_IDS = (byte) -2;
-    private static final byte BITSET_IDS = (byte) -1;
     private static final byte DELTA_BPV_16 = (byte) 16;
     private static final byte BPV_21 = (byte) 21;
     private static final byte BPV_24 = (byte) 24;
@@ -92,21 +89,11 @@ void writeDocIds(IntToIntFunction docIds, int count, DataOutput out) throws IOEx
         }
 
         int min2max = max - min + 1;
-        if (strictlySorted) {
-            if (min2max == count) {
-                // continuous ids, typically happens when segment is sorted
-                out.writeByte(CONTINUOUS_IDS);
-                out.writeVInt(docIds.apply(0));
-                return;
-            } else if (min2max <= (count << 4)) {
-                assert min2max > count : "min2max: " + min2max + ", count: " + count;
-                // Only trigger bitset optimization when max - min + 1 <= 16 * count in order to avoid
-                // expanding too much storage.
-                // A field with lower cardinality will have higher probability to trigger this optimization.
-                out.writeByte(BITSET_IDS);
-                writeIdsAsBitSet(docIds, count, out);
-                return;
-            }
+        if (strictlySorted && min2max == count) {
+            // continuous ids, typically happens when segment is sorted
+            out.writeByte(CONTINUOUS_IDS);
+            out.writeVInt(docIds.apply(0));
+            return;
         }
 
         if (min2max <= 0xFFFF) {
@@ -180,38 +167,6 @@ void writeDocIds(IntToIntFunction docIds, int count, DataOutput out) throws IOEx
         }
     }
 
-    private static void writeIdsAsBitSet(IntToIntFunction docIds, int count, DataOutput out) throws IOException {
-        int min = docIds.apply(0);
-        int max = docIds.apply(count - 1);
-
-        final int offsetWords = min >> 6;
-        final int offsetBits = offsetWords << 6;
-        final int totalWordCount = FixedBitSet.bits2words(max - offsetBits + 1);
-        long currentWord = 0;
-        int currentWordIndex = 0;
-
-        out.writeVInt(offsetWords);
-        out.writeVInt(totalWordCount);
-        // build bit set streaming
-        for (int i = 0; i < count; i++) {
-            final int index = docIds.apply(i) - offsetBits;
-            final int nextWordIndex = index >> 6;
-            assert currentWordIndex <= nextWordIndex;
-            if (currentWordIndex < nextWordIndex) {
-                out.writeLong(currentWord);
-                currentWord = 0L;
-                currentWordIndex++;
-                while (currentWordIndex < nextWordIndex) {
-                    currentWordIndex++;
-                    out.writeLong(0L);
-                }
-            }
-            currentWord |= 1L << index;
-        }
-        out.writeLong(currentWord);
-        assert currentWordIndex + 1 == totalWordCount;
-    }
-
     /** Read {@code count} integers into {@code docIDs}. */
     void readInts(IndexInput in, int count, int[] docIDs) throws IOException {
         if (count == 0) {
@@ -225,9 +180,6 @@ void readInts(IndexInput in, int count, int[] docIDs) throws IOException {
             case CONTINUOUS_IDS:
                 readContinuousIds(in, count, docIDs);
                 break;
-            case BITSET_IDS:
-                readBitSet(in, count, docIDs);
-                break;
             case DELTA_BPV_16:
                 readDelta16(in, count, docIDs);
                 break;
@@ -245,20 +197,6 @@ void readInts(IndexInput in, int count, int[] docIDs) throws IOException {
         }
     }
 
-    private DocIdSetIterator readBitSetIterator(IndexInput in, int count) throws IOException {
-        int offsetWords = in.readVInt();
-        int longLen = in.readVInt();
-        scratchLongs.longs = ArrayUtil.growNoCopy(scratchLongs.longs, longLen);
-        in.readLongs(scratchLongs.longs, 0, longLen);
-        // make ghost bits clear for FixedBitSet.
-        if (longLen < scratchLongs.length) {
-            Arrays.fill(scratchLongs.longs, longLen, scratchLongs.longs.length, 0);
-        }
-        scratchLongs.length = longLen;
-        FixedBitSet bitSet = new FixedBitSet(scratchLongs.longs, longLen << 6);
-        return new DocBaseBitSetIterator(bitSet, count, offsetWords << 6);
-    }
-
     private static void readContinuousIds(IndexInput in, int count, int[] docIDs) throws IOException {
         int start = in.readVInt();
         for (int i = 0; i < count; i++) {
@@ -266,15 +204,6 @@ private static void readContinuousIds(IndexInput in, int count, int[] docIDs) th
         }
     }
 
-    private void readBitSet(IndexInput in, int count, int[] docIDs) throws IOException {
-        DocIdSetIterator iterator = readBitSetIterator(in, count);
-        int docId, pos = 0;
-        while ((docId = iterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
-            docIDs[pos++] = docId;
-        }
-        assert pos == count : "pos: " + pos + ", count: " + count;
-    }
-
     private static void readDelta16(IndexInput in, int count, int[] docIds) throws IOException {
         final int min = in.readVInt();
         final int half = count >> 1;

From 78643cd2de8e834fff64ec76d6de7e2ba652d19c Mon Sep 17 00:00:00 2001
From: elasticsearchmachine <infra-root+elasticsearchmachine@elastic.co>
Date: Tue, 12 Aug 2025 14:06:46 +0000
Subject: [PATCH 5/6] [CI] Auto commit changes from spotless

---
 .../org/elasticsearch/index/codec/vectors/DocIdsWriter.java    | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/DocIdsWriter.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/DocIdsWriter.java
index 05ae6467b4033..257a1340eeff1 100644
--- a/server/src/main/java/org/elasticsearch/index/codec/vectors/DocIdsWriter.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/DocIdsWriter.java
@@ -19,16 +19,13 @@
 package org.elasticsearch.index.codec.vectors;
 
 import org.apache.lucene.index.PointValues.IntersectVisitor;
-import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.store.DataOutput;
 import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.IntsRef;
 import org.apache.lucene.util.LongsRef;
 import org.apache.lucene.util.hnsw.IntToIntFunction;
 
 import java.io.IOException;
-import java.util.Arrays;
 
 /**
  * This class is used to write and read the doc ids in a compressed format. The format is optimized

From 7fa53d10c78aa372c0347f1aa8ce7fa4dff5bfd3 Mon Sep 17 00:00:00 2001
From: Benjamin Trent <ben.w.trent@gmail.com>
Date: Tue, 12 Aug 2025 10:37:30 -0400
Subject: [PATCH 6/6] Update DefaultIVFVectorsWriter.java

---
 .../vectors/DefaultIVFVectorsWriter.java      | 44 ++++++-------------
 1 file changed, 14 insertions(+), 30 deletions(-)

diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsWriter.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsWriter.java
index 444ff9cf18a0a..5e696b74530a8 100644
--- a/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsWriter.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsWriter.java
@@ -109,9 +109,9 @@ LongValues buildAndWritePostingsLists(
             new OptimizedScalarQuantizer(fieldInfo.getVectorSimilarityFunction())
         );
         final ByteBuffer buffer = ByteBuffer.allocate(fieldInfo.getVectorDimension() * Float.BYTES).order(ByteOrder.LITTLE_ENDIAN);
-        int[] docIds = null;
-        int[] docDeltas = null;
-        int[] clusterOrds = null;
+        final int[] docIds = new int[maxPostingListSize];
+        final int[] docDeltas = new int[maxPostingListSize];
+        final int[] clusterOrds = new int[maxPostingListSize];
         DocIdsWriter idsWriter = new DocIdsWriter();
         for (int c = 0; c < centroidSupplier.size(); c++) {
             float[] centroid = centroidSupplier.centroid(c);
@@ -125,29 +125,21 @@ LongValues buildAndWritePostingsLists(
             int size = cluster.length;
             // write docIds
             postingsOutput.writeVInt(size);
-            if (docIds == null || docIds.length < cluster.length) {
-                docIds = new int[cluster.length];
-                clusterOrds = new int[cluster.length];
-                docDeltas = new int[cluster.length];
-            }
             for (int j = 0; j < size; j++) {
                 docIds[j] = floatVectorValues.ordToDoc(cluster[j]);
                 clusterOrds[j] = j;
             }
-            final int[] finalDocs = docIds;
-            final int[] finalOrds = clusterOrds;
             // sort cluster.buffer by docIds values, this way cluster ordinals are sorted by docIds
-            new IntSorter(clusterOrds, i -> finalDocs[i]).sort(0, size);
+            new IntSorter(clusterOrds, i -> docIds[i]).sort(0, size);
             // encode doc deltas
             for (int j = 0; j < size; j++) {
-                docDeltas[j] = j == 0 ? finalDocs[finalOrds[j]] : finalDocs[finalOrds[j]] - finalDocs[finalOrds[j - 1]];
+                docDeltas[j] = j == 0 ? docIds[clusterOrds[j]] : docIds[clusterOrds[j]] - docIds[clusterOrds[j - 1]];
             }
-            final int[] finalDocDeltas = docDeltas;
-            onHeapQuantizedVectors.reset(centroid, size, ord -> cluster[finalOrds[ord]]);
+            onHeapQuantizedVectors.reset(centroid, size, ord -> cluster[clusterOrds[ord]]);
             // TODO we might want to consider putting the docIds in a separate file
             // to aid with only having to fetch vectors from slower storage when they are required
             // keeping them in the same file indicates we pull the entire file into cache
-            idsWriter.writeDocIds(i -> finalDocDeltas[i], size, postingsOutput);
+            idsWriter.writeDocIds(i -> docDeltas[i], size, postingsOutput);
             // write vectors
             bulkWriter.writeVectors(onHeapQuantizedVectors);
         }
@@ -260,9 +252,9 @@ LongValues buildAndWritePostingsLists(
             // write the max posting list size
             postingsOutput.writeVInt(maxPostingListSize);
             // write the posting lists
-            int[] docIds = null;
-            int[] docDeltas = null;
-            int[] clusterOrds = null;
+            final int[] docIds = new int[maxPostingListSize];
+            final int[] docDeltas = new int[maxPostingListSize];
+            final int[] clusterOrds = new int[maxPostingListSize];
             DocIdsWriter idsWriter = new DocIdsWriter();
             for (int c = 0; c < centroidSupplier.size(); c++) {
                 float[] centroid = centroidSupplier.centroid(c);
@@ -277,29 +269,21 @@ LongValues buildAndWritePostingsLists(
                 // write docIds
                 int size = cluster.length;
                 postingsOutput.writeVInt(size);
-                if (docIds == null || docIds.length < cluster.length) {
-                    docIds = new int[cluster.length];
-                    clusterOrds = new int[cluster.length];
-                    docDeltas = new int[cluster.length];
-                }
                 for (int j = 0; j < size; j++) {
                     docIds[j] = floatVectorValues.ordToDoc(cluster[j]);
                     clusterOrds[j] = j;
                 }
-                final int[] finalDocs = docIds;
-                final int[] finalOrds = clusterOrds;
                 // sort cluster.buffer by docIds values, this way cluster ordinals are sorted by docIds
-                new IntSorter(clusterOrds, i -> finalDocs[i]).sort(0, size);
+                new IntSorter(clusterOrds, i -> docIds[i]).sort(0, size);
                 // encode doc deltas
                 for (int j = 0; j < size; j++) {
-                    docDeltas[j] = j == 0 ? finalDocs[finalOrds[j]] : finalDocs[finalOrds[j]] - finalDocs[finalOrds[j - 1]];
+                    docDeltas[j] = j == 0 ? docIds[clusterOrds[j]] : docIds[clusterOrds[j]] - docIds[clusterOrds[j - 1]];
                 }
-                final int[] finalDocDeltas = docDeltas;
-                offHeapQuantizedVectors.reset(size, ord -> isOverspill[finalOrds[ord]], ord -> cluster[finalOrds[ord]]);
+                offHeapQuantizedVectors.reset(size, ord -> isOverspill[clusterOrds[ord]], ord -> cluster[clusterOrds[ord]]);
                 // TODO we might want to consider putting the docIds in a separate file
                 // to aid with only having to fetch vectors from slower storage when they are required
                 // keeping them in the same file indicates we pull the entire file into cache
-                idsWriter.writeDocIds(i -> finalDocDeltas[i], size, postingsOutput);
+                idsWriter.writeDocIds(i -> docDeltas[i], size, postingsOutput);
                 // write vectors
                 bulkWriter.writeVectors(offHeapQuantizedVectors);
             }