Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,8 @@ Improvements

* GITHUB#15332: Add PhraseQuery.Builder.setMaxTerms() method to limit the maximum number of terms and excessive memory use (linyunanit)

* GITHUB#15425: Refactoring internal HnswGraph.NodesIterator to avoid unneeded copying and sorting (Mike Sokolov)

Optimizations
---------------------
* GITHUB#15140: Optimize TopScoreDocCollector with TernaryLongHeap for improved performance over Binary-LongHeap. (Ramakrishna Chilaka)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -575,9 +575,9 @@ public int entryNode() {
@Override
public NodesIterator getNodesOnLevel(int level) {
if (level == 0) {
return new ArrayNodesIterator(size());
return new DenseNodesIterator(size());
} else {
return new ArrayNodesIterator(nodesByLevel[level], nodesByLevel[level].length);
return new ArrayNodesIterator(nodesByLevel[level]);
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ public int entryNode() {
@Override
public NodesIterator getNodesOnLevel(int level) {
if (level == 0) {
return new ArrayNodesIterator(size());
return new DenseNodesIterator(size());
} else {
return new ArrayNodesIterator(nodesByLevel.get(level), graph.get(level).size());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -485,9 +485,9 @@ public int entryNode() {
@Override
public NodesIterator getNodesOnLevel(int level) {
if (level == 0) {
return new ArrayNodesIterator(size());
return new DenseNodesIterator(size());
} else {
return new ArrayNodesIterator(nodesByLevel[level], nodesByLevel[level].length);
return new ArrayNodesIterator(nodesByLevel[level]);
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -537,9 +537,9 @@ public int maxConn() {
@Override
public NodesIterator getNodesOnLevel(int level) {
if (level == 0) {
return new ArrayNodesIterator(size());
return new DenseNodesIterator(size());
} else {
return new ArrayNodesIterator(nodesByLevel[level], nodesByLevel[level].length);
return new ArrayNodesIterator(nodesByLevel[level]);
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -566,9 +566,9 @@ public int maxConn() {
@Override
public NodesIterator getNodesOnLevel(int level) {
if (level == 0) {
return new ArrayNodesIterator(size());
return new DenseNodesIterator(size());
} else {
return new ArrayNodesIterator(nodesByLevel[level], nodesByLevel[level].length);
return new ArrayNodesIterator(nodesByLevel[level]);
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -230,11 +230,11 @@ private void writeMeta(
} else {
meta.writeInt(graph.numLevels());
for (int level = 0; level < graph.numLevels(); level++) {
int[] sortedNodes = HnswGraph.NodesIterator.getSortedNodes(graph.getNodesOnLevel(level));
meta.writeInt(sortedNodes.length); // number of nodes on a level
HnswGraph.NodesIterator sortedNodes = graph.getSortedNodes(level);
meta.writeInt(sortedNodes.size()); // number of nodes on a level
if (level > 0) {
for (int node : sortedNodes) {
meta.writeInt(node); // list of nodes on a level
while (sortedNodes.hasNext()) {
meta.writeInt(sortedNodes.next()); // list of nodes on a level
}
}
}
Expand All @@ -259,9 +259,9 @@ private Lucene91OnHeapHnswGraph writeGraph(
// write vectors' neighbours on each level into the vectorIndex file
int countOnLevel0 = graph.size();
for (int level = 0; level < graph.numLevels(); level++) {
int[] sortedNodes = HnswGraph.NodesIterator.getSortedNodes(graph.getNodesOnLevel(level));
for (int node : sortedNodes) {
Lucene91NeighborArray neighbors = graph.getNeighbors(level, node);
HnswGraph.NodesIterator sortedNodes = graph.getSortedNodes(level);
while (sortedNodes.hasNext()) {
Lucene91NeighborArray neighbors = graph.getNeighbors(level, sortedNodes.next());
int size = neighbors.size();
vectorIndex.writeInt(size);
// Destructively modify; it's ok we are discarding it after this
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -267,11 +267,11 @@ private void writeMeta(
} else {
meta.writeInt(graph.numLevels());
for (int level = 0; level < graph.numLevels(); level++) {
int[] sortedNodes = HnswGraph.NodesIterator.getSortedNodes(graph.getNodesOnLevel(level));
meta.writeInt(sortedNodes.length); // number of nodes on a level
HnswGraph.NodesIterator sortedNodes = graph.getSortedNodes(level);
meta.writeInt(sortedNodes.size()); // number of nodes on a level
if (level > 0) {
for (int node : sortedNodes) {
meta.writeInt(node); // list of nodes on a level
while (sortedNodes.hasNext()) {
meta.writeInt(sortedNodes.next()); // list of nodes on a level
}
}
}
Expand All @@ -295,9 +295,9 @@ private OnHeapHnswGraph writeGraph(
int countOnLevel0 = graph.size();
for (int level = 0; level < graph.numLevels(); level++) {
int maxConnOnLevel = level == 0 ? (M * 2) : M;
int[] sortedNodes = HnswGraph.NodesIterator.getSortedNodes(graph.getNodesOnLevel(level));
for (int node : sortedNodes) {
NeighborArray neighbors = graph.getNeighbors(level, node);
HnswGraph.NodesIterator sortedNodes = graph.getSortedNodes(level);
while (sortedNodes.hasNext()) {
NeighborArray neighbors = graph.getNeighbors(level, sortedNodes.next());
int size = neighbors.size();
vectorIndex.writeInt(size);
// Destructively modify; it's ok we are discarding it after this
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,7 @@ public NodesIterator getNodesOnLevel(int level) {
if (level == 0) {
return graph.getNodesOnLevel(0);
} else {
return new ArrayNodesIterator(nodesByLevel.get(level), nodesByLevel.get(level).length);
return new ArrayNodesIterator(nodesByLevel.get(level));
}
}
};
Expand Down Expand Up @@ -492,9 +492,9 @@ private void writeGraph(OnHeapHnswGraph graph) throws IOException {
int countOnLevel0 = graph.size();
for (int level = 0; level < graph.numLevels(); level++) {
int maxConnOnLevel = level == 0 ? (M * 2) : M;
int[] sortedNodes = HnswGraph.NodesIterator.getSortedNodes(graph.getNodesOnLevel(level));
for (int node : sortedNodes) {
NeighborArray neighbors = graph.getNeighbors(level, node);
HnswGraph.NodesIterator sortedNodes = graph.getSortedNodes(level);
while (sortedNodes.hasNext()) {
NeighborArray neighbors = graph.getNeighbors(level, sortedNodes.next());
int size = neighbors.size();
vectorIndex.writeInt(size);
// Destructively modify; it's ok we are discarding it after this
Expand Down Expand Up @@ -578,11 +578,11 @@ private void writeMeta(
} else {
meta.writeInt(graph.numLevels());
for (int level = 0; level < graph.numLevels(); level++) {
int[] sortedNodes = HnswGraph.NodesIterator.getSortedNodes(graph.getNodesOnLevel(level));
meta.writeInt(sortedNodes.length); // number of nodes on a level
HnswGraph.NodesIterator sortedNodes = graph.getSortedNodes(level);
meta.writeInt(sortedNodes.size()); // number of nodes on a level
if (level > 0) {
for (int node : sortedNodes) {
meta.writeInt(node); // list of nodes on a level
while (sortedNodes.hasNext()) {
meta.writeInt(sortedNodes.next()); // list of nodes on a level
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,7 @@ public NodesIterator getNodesOnLevel(int level) {
if (level == 0) {
return graph.getNodesOnLevel(0);
} else {
return new ArrayNodesIterator(nodesByLevel.get(level), nodesByLevel.get(level).length);
return new ArrayNodesIterator(nodesByLevel.get(level));
}
}
};
Expand Down Expand Up @@ -539,11 +539,11 @@ private int[][] writeGraph(OnHeapHnswGraph graph) throws IOException {
int countOnLevel0 = graph.size();
int[][] offsets = new int[graph.numLevels()][];
for (int level = 0; level < graph.numLevels(); level++) {
int[] sortedNodes = HnswGraph.NodesIterator.getSortedNodes(graph.getNodesOnLevel(level));
offsets[level] = new int[sortedNodes.length];
HnswGraph.NodesIterator sortedNodes = graph.getSortedNodes(level);
offsets[level] = new int[sortedNodes.size()];
int nodeOffsetId = 0;
for (int node : sortedNodes) {
NeighborArray neighbors = graph.getNeighbors(level, node);
while (sortedNodes.hasNext()) {
NeighborArray neighbors = graph.getNeighbors(level, sortedNodes.next());
int size = neighbors.size();
// Write size in VInt as the neighbors list is typically small
long offsetStart = vectorIndex.getFilePointer();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -615,9 +615,9 @@ public int entryNode() throws IOException {
@Override
public NodesIterator getNodesOnLevel(int level) {
if (level == 0) {
return new ArrayNodesIterator(size());
return new DenseNodesIterator(size());
} else {
return new ArrayNodesIterator(nodesByLevel[level], nodesByLevel[level].length);
return new ArrayNodesIterator(nodesByLevel[level]);
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,7 @@ public NodesIterator getNodesOnLevel(int level) {
if (level == 0) {
return graph.getNodesOnLevel(0);
} else {
return new ArrayNodesIterator(nodesByLevel.get(level), nodesByLevel.get(level).length);
return new ArrayNodesIterator(nodesByLevel.get(level));
}
}
};
Expand Down Expand Up @@ -409,7 +409,6 @@ public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOE
// build the graph using the temporary vector data
// we use Lucene99HnswVectorsReader.DenseOffHeapVectorValues for the graph construction
// doesn't need to know docIds
// TODO: separate random access vector values from DocIdSetIterator?
OnHeapHnswGraph graph = null;
int[][] vectorIndexNodeOffsets = null;
// Check if we should bypass graph building for tiny segments
Expand Down Expand Up @@ -475,11 +474,11 @@ private int[][] writeGraph(OnHeapHnswGraph graph) throws IOException {
int[][] offsets = new int[graph.numLevels()][];
int[] scratch = new int[graph.maxConn() * 2];
for (int level = 0; level < graph.numLevels(); level++) {
int[] sortedNodes = NodesIterator.getSortedNodes(graph.getNodesOnLevel(level));
offsets[level] = new int[sortedNodes.length];
NodesIterator sortedNodes = graph.getSortedNodes(level);
offsets[level] = new int[sortedNodes.size()];
int nodeOffsetId = 0;
for (int node : sortedNodes) {
NeighborArray neighbors = graph.getNeighbors(level, node);
while (sortedNodes.hasNext()) {
NeighborArray neighbors = graph.getNeighbors(level, sortedNodes.next());
int size = neighbors.size();
// Write size in VInt as the neighbors list is typically small
long offsetStart = vectorIndex.getFilePointer();
Expand Down
85 changes: 56 additions & 29 deletions lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraph.java
Original file line number Diff line number Diff line change
Expand Up @@ -141,10 +141,23 @@ public int maxConn() {

@Override
public NodesIterator getNodesOnLevel(int level) {
return ArrayNodesIterator.EMPTY;
return DenseNodesIterator.EMPTY;
}
};

public NodesIterator getSortedNodes(int level) throws IOException {
if (level == 0) {
return new DenseNodesIterator(size());
}
NodesIterator nodesOnLevel = getNodesOnLevel(level);
int[] sortedNodes = new int[nodesOnLevel.size()];
for (int n = 0; nodesOnLevel.hasNext(); n++) {
sortedNodes[n] = nodesOnLevel.nextInt();
}
Arrays.sort(sortedNodes);
return new ArrayNodesIterator(sortedNodes);
}

/**
* Iterator over the graph nodes on a certain level. Iterator also provides the size – the total
* number of nodes to be iterated over. The nodes are NOT guaranteed to be presented in any
Expand All @@ -170,36 +183,59 @@ public int size() {
* @return The number of integers written to `dest`
*/
public abstract int consume(int[] dest);

public static int[] getSortedNodes(NodesIterator nodesOnLevel) {
int[] sortedNodes = new int[nodesOnLevel.size()];
for (int n = 0; nodesOnLevel.hasNext(); n++) {
sortedNodes[n] = nodesOnLevel.nextInt();
}
Arrays.sort(sortedNodes);
return sortedNodes;
}
}

/** NodesIterator that accepts nodes as an integer array. */
public static class ArrayNodesIterator extends NodesIterator {
private static final NodesIterator EMPTY = new ArrayNodesIterator(0);

private final int[] nodes;
private int cur = 0;

/** Constructor for iterator based on integer array representing nodes */
/** Normal constructor */
public ArrayNodesIterator(int[] nodes) {
this(nodes, nodes.length);
}

/** Constructor that allows overriding size, used only for back-compat */
public ArrayNodesIterator(int[] nodes, int size) {
super(size);
assert nodes != null;
assert size <= nodes.length;
this.nodes = nodes;
}

/** Constructor for iterator based on the size */
public ArrayNodesIterator(int size) {
@Override
public int consume(int[] dest) {
if (hasNext() == false) {
throw new NoSuchElementException();
}
int numToCopy = Math.min(size - cur, dest.length);
System.arraycopy(nodes, cur, dest, 0, numToCopy);
cur += numToCopy;
return numToCopy;
}

@Override
public int nextInt() {
if (hasNext() == false) {
throw new NoSuchElementException();
}
return nodes[cur++];
}

@Override
public boolean hasNext() {
return cur < size;
}
}

/** NodesIterator that enumerates [0, size) */
public static class DenseNodesIterator extends NodesIterator {
private static final NodesIterator EMPTY = new DenseNodesIterator(0);

private int cur = 0;

/** Sole constructor */
public DenseNodesIterator(int size) {
super(size);
this.nodes = null;
}

@Override
Expand All @@ -208,14 +244,9 @@ public int consume(int[] dest) {
throw new NoSuchElementException();
}
int numToCopy = Math.min(size - cur, dest.length);
if (nodes == null) {
for (int i = 0; i < numToCopy; i++) {
dest[i] = cur + i;
}
return numToCopy;
for (int i = 0; i < numToCopy; i++) {
dest[i] = cur + i;
}
System.arraycopy(nodes, cur, dest, 0, numToCopy);
cur += numToCopy;
return numToCopy;
}

Expand All @@ -224,11 +255,7 @@ public int nextInt() {
if (hasNext() == false) {
throw new NoSuchElementException();
}
if (nodes == null) {
return cur++;
} else {
return nodes[cur++];
}
return cur++;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ static List<Component> components(
HnswGraph.NodesIterator entryPoints;
// System.out.println("components level=" + level);
if (level == hnsw.numLevels() - 1) {
entryPoints = new HnswGraph.ArrayNodesIterator(new int[] {hnsw.entryNode()}, 1);
entryPoints = new HnswGraph.ArrayNodesIterator(new int[] {hnsw.entryNode()});
} else {
entryPoints = hnsw.getNodesOnLevel(level + 1);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@ public NodesIterator getNodesOnLevel(int level) {
"graph build not complete, size=" + size() + " maxNodeId=" + maxNodeId());
}
if (level == 0) {
return new ArrayNodesIterator(size());
return new DenseNodesIterator(size());
} else {
generateLevelToNodes();
return new CollectionNodesIterator(levelToNodes[level]);
Expand Down