From fc84d16186a97b9e424cedcb30e772108a2590df Mon Sep 17 00:00:00 2001 From: Benjamin Trent <4357155+benwtrent@users.noreply.github.com> Date: Thu, 4 Sep 2025 15:47:46 -0400 Subject: [PATCH 1/5] Block encode doc ids --- .../vectors/DefaultIVFVectorsReader.java | 46 ++++++++++++------- .../vectors/DefaultIVFVectorsWriter.java | 34 ++++++++++---- .../codec/vectors/DiskBBQBulkWriter.java | 16 +++++-- 3 files changed, 68 insertions(+), 28 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsReader.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsReader.java index a38122596a9a8..e50b7f18f186c 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsReader.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsReader.java @@ -376,7 +376,9 @@ private static class MemorySegmentPostingsVisitor implements PostingVisitor { final float[] correctionsUpper = new float[BULK_SIZE]; final int[] correctionsSum = new int[BULK_SIZE]; final float[] correctionsAdd = new float[BULK_SIZE]; - final int[] docIdsScratch; + final int[] docIdsScratch = new int[BULK_SIZE]; + byte docEncoding; + int docBase = 0; int vectors; boolean quantized = false; @@ -415,7 +417,6 @@ private static class MemorySegmentPostingsVisitor implements PostingVisitor { quantizedVectorByteSize = (discretizedDimensions / 8); quantizer = new OptimizedScalarQuantizer(fieldInfo.getVectorSimilarityFunction(), DEFAULT_LAMBDA, 1); osqVectorsScorer = ESVectorUtil.getES91OSQVectorsScorer(indexInput, fieldInfo.getVectorDimension()); - this.docIdsScratch = new int[maxPostingListSize]; } @Override @@ -425,15 +426,8 @@ public int resetPostingsScorer(long offset) throws IOException { indexInput.readFloats(centroid, 0, centroid.length); centroidDp = Float.intBitsToFloat(indexInput.readInt()); vectors = indexInput.readVInt(); - // read the doc ids - assert vectors <= docIdsScratch.length; - idsWriter.readInts(indexInput, vectors, docIdsScratch); - // reconstitute from the deltas - int sum = 0; - for (int i = 0; i < vectors; i++) { - sum += docIdsScratch[i]; - docIdsScratch[i] = sum; - } + docEncoding = indexInput.readByte(); + docBase = 0; slicePos = indexInput.getFilePointer(); return vectors; } @@ -495,9 +489,9 @@ private static int docToBulkScore(int[] docIds, int offset, Bits acceptDocs) { return docToScore; } - private static void collectBulk(int[] docIds, int offset, KnnCollector knnCollector, float[] scores) { + private void collectBulk(int offset, KnnCollector knnCollector, float[] scores) { for (int i = 0; i < ES91OSQVectorsScorer.BULK_SIZE; i++) { - final int doc = docIds[offset + i]; + final int doc = docIdsScratch[offset + i]; if (doc != -1) { knnCollector.collect(doc, scores[i]); } @@ -511,8 +505,16 @@ public int visit(KnnCollector knnCollector) throws IOException { int scoredDocs = 0; int limit = vectors - BULK_SIZE + 1; int i = 0; + // read Docs for (; i < limit; i += BULK_SIZE) { - final int docsToBulkScore = acceptDocs == null ? BULK_SIZE : docToBulkScore(docIdsScratch, i, acceptDocs); + // read the doc ids + idsWriter.readInts(indexInput, BULK_SIZE, docEncoding, docIdsScratch); + // reconstitute from the deltas + for (int j = 0; j < BULK_SIZE; j++) { + docBase += docIdsScratch[j]; + docIdsScratch[j] = docBase; + } + final int docsToBulkScore = acceptDocs == null ? BULK_SIZE : docToBulkScore(docIdsScratch, 0, acceptDocs); if (docsToBulkScore == 0) { indexInput.skipBytes(quantizedByteLength * BULK_SIZE); continue; @@ -520,7 +522,7 @@ public int visit(KnnCollector knnCollector) throws IOException { quantizeQueryIfNecessary(); final float maxScore; if (docsToBulkScore < BULK_SIZE / 2) { - maxScore = scoreIndividually(i); + maxScore = scoreIndividually(0); } else { maxScore = osqVectorsScorer.scoreBulk( quantizedQueryScratch, @@ -534,13 +536,23 @@ public int visit(KnnCollector knnCollector) throws IOException { ); } if (knnCollector.minCompetitiveSimilarity() < maxScore) { - collectBulk(docIdsScratch, i, knnCollector, scores); + collectBulk(0, knnCollector, scores); } scoredDocs += docsToBulkScore; } // process tail + // read the doc ids + if (i < vectors) { + idsWriter.readInts(indexInput, vectors - i, docEncoding, docIdsScratch); + // reconstitute from the deltas + for (int j = 0; j < vectors - i; j++) { + docBase += docIdsScratch[j]; + docIdsScratch[j] = docBase; + } + } + int count = 0; for (; i < vectors; i++) { - int doc = docIdsScratch[i]; + int doc = docIdsScratch[count]; if (acceptDocs == null || acceptDocs.get(doc)) { quantizeQueryIfNecessary(); float qcDist = osqVectorsScorer.quantizeScore(quantizedQueryScratch); diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsWriter.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsWriter.java index c91e32aa9fccb..04febed1c45e7 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsWriter.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsWriter.java @@ -103,7 +103,10 @@ CentroidOffsetAndLength buildAndWritePostingsLists( // write the posting lists final PackedLongValues.Builder offsets = PackedLongValues.monotonicBuilder(PackedInts.COMPACT); final PackedLongValues.Builder lengths = PackedLongValues.monotonicBuilder(PackedInts.COMPACT); - DiskBBQBulkWriter bulkWriter = new DiskBBQBulkWriter.OneBitDiskBBQBulkWriter(ES91OSQVectorsScorer.BULK_SIZE, postingsOutput); + DiskBBQBulkWriter.OneBitDiskBBQBulkWriter bulkWriter = new DiskBBQBulkWriter.OneBitDiskBBQBulkWriter( + ES91OSQVectorsScorer.BULK_SIZE, + postingsOutput + ); OnHeapQuantizedVectors onHeapQuantizedVectors = new OnHeapQuantizedVectors( floatVectorValues, fieldInfo.getVectorDimension(), @@ -138,12 +141,17 @@ CentroidOffsetAndLength buildAndWritePostingsLists( docDeltas[j] = j == 0 ? docIds[clusterOrds[j]] : docIds[clusterOrds[j]] - docIds[clusterOrds[j - 1]]; } onHeapQuantizedVectors.reset(centroid, size, ord -> cluster[clusterOrds[ord]]); + byte encoding = idsWriter.calculateBlockEncoding(i -> docDeltas[i], size, ES91OSQVectorsScorer.BULK_SIZE); + postingsOutput.writeByte(encoding); // TODO we might want to consider putting the docIds in a separate file // to aid with only having to fetch vectors from slower storage when they are required // keeping them in the same file indicates we pull the entire file into cache - idsWriter.writeDocIds(i -> docDeltas[i], size, postingsOutput); + // idsWriter.writeDocIds(i -> docDeltas[i], size, postingsOutput); // write vectors - bulkWriter.writeVectors(onHeapQuantizedVectors); + bulkWriter.writeVectors(onHeapQuantizedVectors, i -> { + // for vector i we write `bulk` size docs or the remaining docs + idsWriter.writeDocIds(d -> docDeltas[i + d], Math.min(ES91OSQVectorsScorer.BULK_SIZE, size - i), encoding, postingsOutput); + }); lengths.add(postingsOutput.getFilePointer() - fileOffset - offset); } @@ -287,13 +295,23 @@ CentroidOffsetAndLength buildAndWritePostingsLists( for (int j = 0; j < size; j++) { docDeltas[j] = j == 0 ? docIds[clusterOrds[j]] : docIds[clusterOrds[j]] - docIds[clusterOrds[j - 1]]; } + byte encoding = idsWriter.calculateBlockEncoding(i -> docDeltas[i], size, ES91OSQVectorsScorer.BULK_SIZE); + postingsOutput.writeByte(encoding); offHeapQuantizedVectors.reset(size, ord -> isOverspill[clusterOrds[ord]], ord -> cluster[clusterOrds[ord]]); // TODO we might want to consider putting the docIds in a separate file // to aid with only having to fetch vectors from slower storage when they are required // keeping them in the same file indicates we pull the entire file into cache - idsWriter.writeDocIds(i -> docDeltas[i], size, postingsOutput); + // idsWriter.writeDocIds(i -> docDeltas[i], size, postingsOutput); // write vectors - bulkWriter.writeVectors(offHeapQuantizedVectors); + bulkWriter.writeVectors(offHeapQuantizedVectors, i -> { + // for vector i we write `bulk` size docs or the remaining docs + idsWriter.writeDocIds( + d -> docDeltas[d + i], + Math.min(ES91OSQVectorsScorer.BULK_SIZE, size - i), + encoding, + postingsOutput + ); + }); lengths.add(postingsOutput.getFilePointer() - fileOffset - offset); // lengths.add(1); } @@ -381,7 +399,7 @@ private void writeCentroidsWithParents( osq, globalCentroid ); - bulkWriter.writeVectors(parentQuantizeCentroid); + bulkWriter.writeVectors(parentQuantizeCentroid, null); int offset = 0; for (int i = 0; i < centroidGroups.centroids().length; i++) { centroidOutput.writeInt(offset); @@ -398,7 +416,7 @@ private void writeCentroidsWithParents( for (int i = 0; i < centroidGroups.centroids().length; i++) { final int[] centroidAssignments = centroidGroups.vectors()[i]; childrenQuantizeCentroid.reset(idx -> centroidAssignments[idx], centroidAssignments.length); - bulkWriter.writeVectors(childrenQuantizeCentroid); + bulkWriter.writeVectors(childrenQuantizeCentroid, null); } // write the centroid offsets at the end of the file for (int i = 0; i < centroidGroups.centroids().length; i++) { @@ -429,7 +447,7 @@ private void writeCentroidsWithoutParents( osq, globalCentroid ); - bulkWriter.writeVectors(quantizedCentroids); + bulkWriter.writeVectors(quantizedCentroids, null); // write the centroid offsets at the end of the file for (int i = 0; i < centroidSupplier.size(); i++) { centroidOutput.writeLong(centroidOffsetAndLength.offsets().get(i)); diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/DiskBBQBulkWriter.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/DiskBBQBulkWriter.java index 1d7e7f74f6c14..4c1cc27286a63 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/DiskBBQBulkWriter.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/DiskBBQBulkWriter.java @@ -9,6 +9,7 @@ package org.elasticsearch.index.codec.vectors; +import org.apache.lucene.search.CheckedIntConsumer; import org.apache.lucene.store.IndexOutput; import java.io.IOException; @@ -27,7 +28,8 @@ protected DiskBBQBulkWriter(int bulkSize, IndexOutput out) { this.out = out; } - abstract void writeVectors(DefaultIVFVectorsWriter.QuantizedVectorValues qvv) throws IOException; + abstract void writeVectors(DefaultIVFVectorsWriter.QuantizedVectorValues qvv, CheckedIntConsumer docsWriter) + throws IOException; static class OneBitDiskBBQBulkWriter extends DiskBBQBulkWriter { private final OptimizedScalarQuantizer.QuantizationResult[] corrections; @@ -38,10 +40,14 @@ static class OneBitDiskBBQBulkWriter extends DiskBBQBulkWriter { } @Override - void writeVectors(DefaultIVFVectorsWriter.QuantizedVectorValues qvv) throws IOException { + void writeVectors(DefaultIVFVectorsWriter.QuantizedVectorValues qvv, CheckedIntConsumer docsWriter) + throws IOException { int limit = qvv.count() - bulkSize + 1; int i = 0; for (; i < limit; i += bulkSize) { + if (docsWriter != null) { + docsWriter.accept(i); + } for (int j = 0; j < bulkSize; j++) { byte[] qv = qvv.next(); corrections[j] = qvv.getCorrections(); @@ -49,6 +55,9 @@ void writeVectors(DefaultIVFVectorsWriter.QuantizedVectorValues qvv) throws IOEx } writeCorrections(corrections); } + if (i < qvv.count() && docsWriter != null) { + docsWriter.accept(i); + } // write tail for (; i < qvv.count(); ++i) { byte[] qv = qvv.next(); @@ -94,7 +103,8 @@ static class SevenBitDiskBBQBulkWriter extends DiskBBQBulkWriter { } @Override - void writeVectors(DefaultIVFVectorsWriter.QuantizedVectorValues qvv) throws IOException { + void writeVectors(DefaultIVFVectorsWriter.QuantizedVectorValues qvv, CheckedIntConsumer docsWriter) + throws IOException { int limit = qvv.count() - bulkSize + 1; int i = 0; for (; i < limit; i += bulkSize) { From 945bac764953e0ab72c47d8145d61a72f2ec039b Mon Sep 17 00:00:00 2001 From: Benjamin Trent <4357155+benwtrent@users.noreply.github.com> Date: Thu, 4 Sep 2025 16:13:59 -0400 Subject: [PATCH 2/5] adjust reader --- .../vectors/DefaultIVFVectorsReader.java | 23 +++++++++---------- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsReader.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsReader.java index e50b7f18f186c..2a9e4ea6d7e5c 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsReader.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsReader.java @@ -498,6 +498,15 @@ private void collectBulk(int offset, KnnCollector knnCollector, float[] scores) } } + private void readDocIds(int count) throws IOException { + idsWriter.readInts(indexInput, count, docEncoding, docIdsScratch); + // reconstitute from the deltas + for (int j = 0; j < count; j++) { + docBase += docIdsScratch[j]; + docIdsScratch[j] = docBase; + } + } + @Override public int visit(KnnCollector knnCollector) throws IOException { indexInput.seek(slicePos); @@ -508,12 +517,7 @@ public int visit(KnnCollector knnCollector) throws IOException { // read Docs for (; i < limit; i += BULK_SIZE) { // read the doc ids - idsWriter.readInts(indexInput, BULK_SIZE, docEncoding, docIdsScratch); - // reconstitute from the deltas - for (int j = 0; j < BULK_SIZE; j++) { - docBase += docIdsScratch[j]; - docIdsScratch[j] = docBase; - } + readDocIds(BULK_SIZE); final int docsToBulkScore = acceptDocs == null ? BULK_SIZE : docToBulkScore(docIdsScratch, 0, acceptDocs); if (docsToBulkScore == 0) { indexInput.skipBytes(quantizedByteLength * BULK_SIZE); @@ -543,12 +547,7 @@ public int visit(KnnCollector knnCollector) throws IOException { // process tail // read the doc ids if (i < vectors) { - idsWriter.readInts(indexInput, vectors - i, docEncoding, docIdsScratch); - // reconstitute from the deltas - for (int j = 0; j < vectors - i; j++) { - docBase += docIdsScratch[j]; - docIdsScratch[j] = docBase; - } + readDocIds(vectors - i); } int count = 0; for (; i < vectors; i++) { From 1f05dd66d40f4ecc6f4ff5b3845f70e9bd630e5f Mon Sep 17 00:00:00 2001 From: Benjamin Trent <4357155+benwtrent@users.noreply.github.com> Date: Thu, 4 Sep 2025 16:17:51 -0400 Subject: [PATCH 3/5] iter --- .../vectors/DefaultIVFVectorsReader.java | 23 +++++++++---------- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsReader.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsReader.java index 2a9e4ea6d7e5c..6696dd6cacd86 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsReader.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsReader.java @@ -432,11 +432,11 @@ public int resetPostingsScorer(long offset) throws IOException { return vectors; } - private float scoreIndividually(int offset) throws IOException { + private float scoreIndividually() throws IOException { float maxScore = Float.NEGATIVE_INFINITY; // score individually, first the quantized byte chunk for (int j = 0; j < BULK_SIZE; j++) { - int doc = docIdsScratch[j + offset]; + int doc = docIdsScratch[j]; if (doc != -1) { float qcDist = osqVectorsScorer.quantizeScore(quantizedQueryScratch); scores[j] = qcDist; @@ -453,7 +453,7 @@ private float scoreIndividually(int offset) throws IOException { indexInput.readFloats(correctionsAdd, 0, BULK_SIZE); // Now apply corrections for (int j = 0; j < BULK_SIZE; j++) { - int doc = docIdsScratch[offset + j]; + int doc = docIdsScratch[j]; if (doc != -1) { scores[j] = osqVectorsScorer.score( queryCorrections.lowerInterval(), @@ -476,22 +476,21 @@ private float scoreIndividually(int offset) throws IOException { return maxScore; } - private static int docToBulkScore(int[] docIds, int offset, Bits acceptDocs) { + private static int docToBulkScore(int[] docIds, Bits acceptDocs) { assert acceptDocs != null : "acceptDocs must not be null"; int docToScore = ES91OSQVectorsScorer.BULK_SIZE; for (int i = 0; i < ES91OSQVectorsScorer.BULK_SIZE; i++) { - final int idx = offset + i; - if (acceptDocs.get(docIds[idx]) == false) { - docIds[idx] = -1; + if (acceptDocs.get(docIds[i]) == false) { + docIds[i] = -1; docToScore--; } } return docToScore; } - private void collectBulk(int offset, KnnCollector knnCollector, float[] scores) { + private void collectBulk(KnnCollector knnCollector, float[] scores) { for (int i = 0; i < ES91OSQVectorsScorer.BULK_SIZE; i++) { - final int doc = docIdsScratch[offset + i]; + final int doc = docIdsScratch[i]; if (doc != -1) { knnCollector.collect(doc, scores[i]); } @@ -518,7 +517,7 @@ public int visit(KnnCollector knnCollector) throws IOException { for (; i < limit; i += BULK_SIZE) { // read the doc ids readDocIds(BULK_SIZE); - final int docsToBulkScore = acceptDocs == null ? BULK_SIZE : docToBulkScore(docIdsScratch, 0, acceptDocs); + final int docsToBulkScore = acceptDocs == null ? BULK_SIZE : docToBulkScore(docIdsScratch, acceptDocs); if (docsToBulkScore == 0) { indexInput.skipBytes(quantizedByteLength * BULK_SIZE); continue; @@ -526,7 +525,7 @@ public int visit(KnnCollector knnCollector) throws IOException { quantizeQueryIfNecessary(); final float maxScore; if (docsToBulkScore < BULK_SIZE / 2) { - maxScore = scoreIndividually(0); + maxScore = scoreIndividually(); } else { maxScore = osqVectorsScorer.scoreBulk( quantizedQueryScratch, @@ -540,7 +539,7 @@ public int visit(KnnCollector knnCollector) throws IOException { ); } if (knnCollector.minCompetitiveSimilarity() < maxScore) { - collectBulk(0, knnCollector, scores); + collectBulk(knnCollector, scores); } scoredDocs += docsToBulkScore; } From e122a0486b34ee42bf4a2278ea08592ceb7beb47 Mon Sep 17 00:00:00 2001 From: Benjamin Trent <4357155+benwtrent@users.noreply.github.com> Date: Thu, 4 Sep 2025 17:14:19 -0400 Subject: [PATCH 4/5] fixing iteration --- .../index/codec/vectors/DefaultIVFVectorsReader.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsReader.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsReader.java index 6696dd6cacd86..7bba3d94a7e96 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsReader.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsReader.java @@ -550,7 +550,7 @@ public int visit(KnnCollector knnCollector) throws IOException { } int count = 0; for (; i < vectors; i++) { - int doc = docIdsScratch[count]; + int doc = docIdsScratch[count++]; if (acceptDocs == null || acceptDocs.get(doc)) { quantizeQueryIfNecessary(); float qcDist = osqVectorsScorer.quantizeScore(quantizedQueryScratch); From 3c9aa1fadaffa869fec778b1322bedc9b7a88085 Mon Sep 17 00:00:00 2001 From: Benjamin Trent <4357155+benwtrent@users.noreply.github.com> Date: Fri, 5 Sep 2025 06:55:50 -0400 Subject: [PATCH 5/5] fix up a bit --- .../codec/vectors/DefaultIVFVectorsWriter.java | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsWriter.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsWriter.java index 04febed1c45e7..91e4d29690660 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsWriter.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsWriter.java @@ -103,10 +103,7 @@ CentroidOffsetAndLength buildAndWritePostingsLists( // write the posting lists final PackedLongValues.Builder offsets = PackedLongValues.monotonicBuilder(PackedInts.COMPACT); final PackedLongValues.Builder lengths = PackedLongValues.monotonicBuilder(PackedInts.COMPACT); - DiskBBQBulkWriter.OneBitDiskBBQBulkWriter bulkWriter = new DiskBBQBulkWriter.OneBitDiskBBQBulkWriter( - ES91OSQVectorsScorer.BULK_SIZE, - postingsOutput - ); + DiskBBQBulkWriter bulkWriter = new DiskBBQBulkWriter.OneBitDiskBBQBulkWriter(ES91OSQVectorsScorer.BULK_SIZE, postingsOutput); OnHeapQuantizedVectors onHeapQuantizedVectors = new OnHeapQuantizedVectors( floatVectorValues, fieldInfo.getVectorDimension(), @@ -143,11 +140,6 @@ CentroidOffsetAndLength buildAndWritePostingsLists( onHeapQuantizedVectors.reset(centroid, size, ord -> cluster[clusterOrds[ord]]); byte encoding = idsWriter.calculateBlockEncoding(i -> docDeltas[i], size, ES91OSQVectorsScorer.BULK_SIZE); postingsOutput.writeByte(encoding); - // TODO we might want to consider putting the docIds in a separate file - // to aid with only having to fetch vectors from slower storage when they are required - // keeping them in the same file indicates we pull the entire file into cache - // idsWriter.writeDocIds(i -> docDeltas[i], size, postingsOutput); - // write vectors bulkWriter.writeVectors(onHeapQuantizedVectors, i -> { // for vector i we write `bulk` size docs or the remaining docs idsWriter.writeDocIds(d -> docDeltas[i + d], Math.min(ES91OSQVectorsScorer.BULK_SIZE, size - i), encoding, postingsOutput); @@ -298,10 +290,6 @@ CentroidOffsetAndLength buildAndWritePostingsLists( byte encoding = idsWriter.calculateBlockEncoding(i -> docDeltas[i], size, ES91OSQVectorsScorer.BULK_SIZE); postingsOutput.writeByte(encoding); offHeapQuantizedVectors.reset(size, ord -> isOverspill[clusterOrds[ord]], ord -> cluster[clusterOrds[ord]]); - // TODO we might want to consider putting the docIds in a separate file - // to aid with only having to fetch vectors from slower storage when they are required - // keeping them in the same file indicates we pull the entire file into cache - // idsWriter.writeDocIds(i -> docDeltas[i], size, postingsOutput); // write vectors bulkWriter.writeVectors(offHeapQuantizedVectors, i -> { // for vector i we write `bulk` size docs or the remaining docs @@ -313,7 +301,6 @@ CentroidOffsetAndLength buildAndWritePostingsLists( ); }); lengths.add(postingsOutput.getFilePointer() - fileOffset - offset); - // lengths.add(1); } if (logger.isDebugEnabled()) {