delta encode

benwtrent · benwtrent · commit ba91845a433f · 2025-08-11T17:39:24.000-04:00
diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsReader.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsReader.java
@@ -16,7 +16,6 @@
 import org.apache.lucene.search.KnnCollector;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.GroupVIntUtil;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.VectorUtil;
 import org.apache.lucene.util.hnsw.NeighborQueue;
@@ -332,6 +331,7 @@ private static class MemorySegmentPostingsVisitor implements PostingVisitor {
         final int[] quantizationScratch;
         final byte[] quantizedQueryScratch;
         final OptimizedScalarQuantizer quantizer;
+        final DocIdsWriter idsWriter = new DocIdsWriter();
         final float[] correctiveValues = new float[3];
         final long quantizedVectorByteSize;
 
@@ -369,7 +369,7 @@ public int resetPostingsScorer(long offset) throws IOException {
             vectors = indexInput.readVInt();
             // read the doc ids
             assert vectors <= docIdsScratch.length;
-            GroupVIntUtil.readGroupVInts(indexInput, docIdsScratch, vectors);
+            idsWriter.readInts(indexInput, vectors, docIdsScratch);
             // reconstitute from the deltas
             int sum = 0;
             for (int i = 0; i < vectors; i++) {
diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsWriter.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsWriter.java
@@ -112,6 +112,7 @@ LongValues buildAndWritePostingsLists(
         int[] docIds = null;
         int[] docDeltas = null;
         int[] clusterOrds = null;
+        DocIdsWriter idsWriter = new DocIdsWriter();
         for (int c = 0; c < centroidSupplier.size(); c++) {
             float[] centroid = centroidSupplier.centroid(c);
             int[] cluster = assignmentsByCluster[c];
@@ -141,11 +142,12 @@ LongValues buildAndWritePostingsLists(
             for (int j = 0; j < size; j++) {
                 docDeltas[j] = j == 0 ? finalDocs[finalOrds[j]] : finalDocs[finalOrds[j]] - finalDocs[finalOrds[j - 1]];
             }
+            final int[] finalDocDeltas = docDeltas;
             onHeapQuantizedVectors.reset(centroid, size, ord -> cluster[finalOrds[ord]]);
             // TODO we might want to consider putting the docIds in a separate file
             // to aid with only having to fetch vectors from slower storage when they are required
             // keeping them in the same file indicates we pull the entire file into cache
-            postingsOutput.writeGroupVInts(docDeltas, size);
+            idsWriter.writeDocIds(i -> finalDocDeltas[i], size, postingsOutput);
             // write vectors
             bulkWriter.writeVectors(onHeapQuantizedVectors);
         }
@@ -261,6 +263,7 @@ LongValues buildAndWritePostingsLists(
             int[] docIds = null;
             int[] docDeltas = null;
             int[] clusterOrds = null;
+            DocIdsWriter idsWriter = new DocIdsWriter();
             for (int c = 0; c < centroidSupplier.size(); c++) {
                 float[] centroid = centroidSupplier.centroid(c);
                 int[] cluster = assignmentsByCluster[c];
@@ -291,11 +294,12 @@ LongValues buildAndWritePostingsLists(
                 for (int j = 0; j < size; j++) {
                     docDeltas[j] = j == 0 ? finalDocs[finalOrds[j]] : finalDocs[finalOrds[j]] - finalDocs[finalOrds[j - 1]];
                 }
+                final int[] finalDocDeltas = docDeltas;
                 offHeapQuantizedVectors.reset(size, ord -> isOverspill[finalOrds[ord]], ord -> cluster[finalOrds[ord]]);
                 // TODO we might want to consider putting the docIds in a separate file
                 // to aid with only having to fetch vectors from slower storage when they are required
                 // keeping them in the same file indicates we pull the entire file into cache
-                postingsOutput.writeGroupVInts(docDeltas, size);
+                idsWriter.writeDocIds(i -> finalDocDeltas[i], size, postingsOutput);
                 // write vectors
                 bulkWriter.writeVectors(offHeapQuantizedVectors);
             }