Skip to content

Commit ba91845

Browse files
committed
delta encode
1 parent 8d8ea36 commit ba91845

File tree

2 files changed

+8
-4
lines changed

2 files changed

+8
-4
lines changed

server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsReader.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
import org.apache.lucene.search.KnnCollector;
1717
import org.apache.lucene.store.IndexInput;
1818
import org.apache.lucene.util.ArrayUtil;
19-
import org.apache.lucene.util.GroupVIntUtil;
2019
import org.apache.lucene.util.Bits;
2120
import org.apache.lucene.util.VectorUtil;
2221
import org.apache.lucene.util.hnsw.NeighborQueue;
@@ -332,6 +331,7 @@ private static class MemorySegmentPostingsVisitor implements PostingVisitor {
332331
final int[] quantizationScratch;
333332
final byte[] quantizedQueryScratch;
334333
final OptimizedScalarQuantizer quantizer;
334+
final DocIdsWriter idsWriter = new DocIdsWriter();
335335
final float[] correctiveValues = new float[3];
336336
final long quantizedVectorByteSize;
337337

@@ -369,7 +369,7 @@ public int resetPostingsScorer(long offset) throws IOException {
369369
vectors = indexInput.readVInt();
370370
// read the doc ids
371371
assert vectors <= docIdsScratch.length;
372-
GroupVIntUtil.readGroupVInts(indexInput, docIdsScratch, vectors);
372+
idsWriter.readInts(indexInput, vectors, docIdsScratch);
373373
// reconstitute from the deltas
374374
int sum = 0;
375375
for (int i = 0; i < vectors; i++) {

server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsWriter.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ LongValues buildAndWritePostingsLists(
112112
int[] docIds = null;
113113
int[] docDeltas = null;
114114
int[] clusterOrds = null;
115+
DocIdsWriter idsWriter = new DocIdsWriter();
115116
for (int c = 0; c < centroidSupplier.size(); c++) {
116117
float[] centroid = centroidSupplier.centroid(c);
117118
int[] cluster = assignmentsByCluster[c];
@@ -141,11 +142,12 @@ LongValues buildAndWritePostingsLists(
141142
for (int j = 0; j < size; j++) {
142143
docDeltas[j] = j == 0 ? finalDocs[finalOrds[j]] : finalDocs[finalOrds[j]] - finalDocs[finalOrds[j - 1]];
143144
}
145+
final int[] finalDocDeltas = docDeltas;
144146
onHeapQuantizedVectors.reset(centroid, size, ord -> cluster[finalOrds[ord]]);
145147
// TODO we might want to consider putting the docIds in a separate file
146148
// to aid with only having to fetch vectors from slower storage when they are required
147149
// keeping them in the same file indicates we pull the entire file into cache
148-
postingsOutput.writeGroupVInts(docDeltas, size);
150+
idsWriter.writeDocIds(i -> finalDocDeltas[i], size, postingsOutput);
149151
// write vectors
150152
bulkWriter.writeVectors(onHeapQuantizedVectors);
151153
}
@@ -261,6 +263,7 @@ LongValues buildAndWritePostingsLists(
261263
int[] docIds = null;
262264
int[] docDeltas = null;
263265
int[] clusterOrds = null;
266+
DocIdsWriter idsWriter = new DocIdsWriter();
264267
for (int c = 0; c < centroidSupplier.size(); c++) {
265268
float[] centroid = centroidSupplier.centroid(c);
266269
int[] cluster = assignmentsByCluster[c];
@@ -291,11 +294,12 @@ LongValues buildAndWritePostingsLists(
291294
for (int j = 0; j < size; j++) {
292295
docDeltas[j] = j == 0 ? finalDocs[finalOrds[j]] : finalDocs[finalOrds[j]] - finalDocs[finalOrds[j - 1]];
293296
}
297+
final int[] finalDocDeltas = docDeltas;
294298
offHeapQuantizedVectors.reset(size, ord -> isOverspill[finalOrds[ord]], ord -> cluster[finalOrds[ord]]);
295299
// TODO we might want to consider putting the docIds in a separate file
296300
// to aid with only having to fetch vectors from slower storage when they are required
297301
// keeping them in the same file indicates we pull the entire file into cache
298-
postingsOutput.writeGroupVInts(docDeltas, size);
302+
idsWriter.writeDocIds(i -> finalDocDeltas[i], size, postingsOutput);
299303
// write vectors
300304
bulkWriter.writeVectors(offHeapQuantizedVectors);
301305
}

0 commit comments

Comments
 (0)