From 2b50cf189f3fca17e00d6cb4eec6901c5020c447 Mon Sep 17 00:00:00 2001 From: Ignacio Vera Date: Tue, 2 Sep 2025 16:46:42 +0100 Subject: [PATCH 1/3] [DiskBBQ] small optimization on how we manage queues in hierarchical centroids --- .../vectors/DefaultIVFVectorsReader.java | 19 +++++++++---------- .../codec/vectors/cluster/NeighborQueue.java | 12 ++++++++++++ 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsReader.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsReader.java index 3999622fdc52e..4740aef5baf26 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsReader.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsReader.java @@ -18,7 +18,7 @@ import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.Bits; import org.apache.lucene.util.VectorUtil; -import org.apache.lucene.util.hnsw.NeighborQueue; +import org.elasticsearch.index.codec.vectors.cluster.NeighborQueue; import org.elasticsearch.index.codec.vectors.reflect.OffHeapStats; import org.elasticsearch.simdvec.ES91OSQVectorsScorer; import org.elasticsearch.simdvec.ES92Int7VectorsScorer; @@ -243,22 +243,19 @@ public boolean hasNext() { @Override public CentroidOffsetAndLength nextPostingListOffsetAndLength() throws IOException { - int centroidOrdinal = neighborQueue.pop(); - updateQueue(); // add one children if available so the queue remains fully populated + int centroidOrdinal = nextCentroid(); centroids.seek(childrenFileOffsets + (long) Long.BYTES * 2 * centroidOrdinal); long postingListOffset = centroids.readLong(); long postingListLength = centroids.readLong(); return new CentroidOffsetAndLength(postingListOffset, postingListLength); } - private void updateQueue() throws IOException { + private int nextCentroid() throws IOException { if (currentParentQueue.size() > 0) { - // add a children from the current parent queue - float score = currentParentQueue.topScore(); - int children = currentParentQueue.pop(); - neighborQueue.add(children, score); + // return next centroid and add a children from the current parent queue + return neighborQueue.popAndAddRaw(currentParentQueue.popRaw()); } else if (parentsQueue.size() > 0) { - // add a new parent from the current parent queue + // current parent queue is empty, populate it again with the next parent int pop = parentsQueue.pop(); populateOneChildrenGroup( currentParentQueue, @@ -273,7 +270,9 @@ private void updateQueue() throws IOException { globalCentroidDp, scores ); - updateQueue(); + return nextCentroid(); + } else { + return neighborQueue.pop(); } } }; diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/cluster/NeighborQueue.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/cluster/NeighborQueue.java index b0e261bb3aefb..f420f4d1cb3c7 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/cluster/NeighborQueue.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/cluster/NeighborQueue.java @@ -137,6 +137,18 @@ public int pop() { return decodeNodeId(heap.pop()); } + /** Removes the top element and returns it */ + public long popRaw() { + return heap.pop(); + } + + /** Removes the top element, returns it and adds the new encoded element */ + public int popAndAddRaw(long raw) { + long top = heap.top(); + heap.updateTop(raw); + return decodeNodeId(top); + } + public void clear() { heap.clear(); } From 102b74afd413601249b09cc8605976920c9de21a Mon Sep 17 00:00:00 2001 From: Ignacio Vera Date: Wed, 3 Sep 2025 08:01:34 +0100 Subject: [PATCH 2/3] Use the new value if it is the most competitive. --- .../index/codec/vectors/cluster/NeighborQueue.java | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/cluster/NeighborQueue.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/cluster/NeighborQueue.java index f420f4d1cb3c7..737a7b078024a 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/cluster/NeighborQueue.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/cluster/NeighborQueue.java @@ -142,9 +142,16 @@ public long popRaw() { return heap.pop(); } - /** Removes the top element, returns it and adds the new encoded element */ + /** + * if the new element is the new top then return its node id. Otherwise, + * removes the current top element, returns its node id and adds the new element + * to the queue. + * */ public int popAndAddRaw(long raw) { long top = heap.top(); + if (raw < top) { + return decodeNodeId(raw); + } heap.updateTop(raw); return decodeNodeId(top); } From 3e208cb9cc877f18a72744804118024bb17e89bb Mon Sep 17 00:00:00 2001 From: Ignacio Vera Date: Wed, 3 Sep 2025 09:09:03 +0100 Subject: [PATCH 3/3] iter --- .../index/codec/vectors/DefaultIVFVectorsReader.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsReader.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsReader.java index 4740aef5baf26..a38122596a9a8 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsReader.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsReader.java @@ -252,7 +252,7 @@ public CentroidOffsetAndLength nextPostingListOffsetAndLength() throws IOExcepti private int nextCentroid() throws IOException { if (currentParentQueue.size() > 0) { - // return next centroid and add a children from the current parent queue + // return next centroid and maybe add a children from the current parent queue return neighborQueue.popAndAddRaw(currentParentQueue.popRaw()); } else if (parentsQueue.size() > 0) { // current parent queue is empty, populate it again with the next parent