Skip to content

Commit 77775a9

Browse files
Adding 3-ary LongHeap to speed up collectors
1 parent 839425e commit 77775a9

File tree

2 files changed

+74
-26
lines changed

2 files changed

+74
-26
lines changed

lucene/core/src/java/org/apache/lucene/util/LongHeap.java

Lines changed: 66 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
/**
2222
* A min heap that stores longs; a primitive priority queue that like all priority queues maintains
2323
* a partial ordering of its elements such that the least element can always be found in constant
24-
* time. Put()'s and pop()'s require log(size). This heap provides unbounded growth via {@link
24+
* time. Put()'s and pop()'s require log_d(size). This heap provides unbounded growth via {@link
2525
* #push(long)}, and bounded-size insertion based on its nominal maxSize via {@link
2626
* #insertWithOverflow(long)}. The heap is a min heap, meaning that the top element is the lowest
2727
* value of the heap.
@@ -31,6 +31,7 @@
3131
public final class LongHeap {
3232

3333
private final int maxSize;
34+
private final int d; // branching factor (arity)
3435

3536
private long[] heap;
3637
private int size = 0;
@@ -42,7 +43,19 @@ public final class LongHeap {
4243
* @param initialValue the value to fill the heap with.
4344
*/
4445
public LongHeap(int size, long initialValue) {
45-
this(size);
46+
this(size, initialValue, 3);
47+
}
48+
49+
/**
50+
* Constructs a heap with specified size, initializes all elements with the given value, and
51+
* allows configuring the arity (branching factor).
52+
*
53+
* @param size the number of elements to initialize in the heap.
54+
* @param initialValue the value to fill the heap with.
55+
* @param arity branching factor (>=2).
56+
*/
57+
public LongHeap(int size, long initialValue, int arity) {
58+
this(size <= 0 ? 1 : size, arity);
4659
Arrays.fill(heap, 1, size + 1, initialValue);
4760
this.size = size;
4861
}
@@ -54,18 +67,36 @@ public LongHeap(int size, long initialValue) {
5467
* heap
5568
*/
5669
public LongHeap(int maxSize) {
57-
final int heapSize;
70+
this(maxSize, 3);
71+
}
72+
73+
/**
74+
* Create an empty priority queue with configurable arity.
75+
*
76+
* @param maxSize the maximum size of the heap; must be 0 and MAX_ARRAY_LENGTH
77+
* @param arity branching factor (>=2)
78+
*/
79+
public LongHeap(int maxSize, int arity) {
5880
if (maxSize < 1 || maxSize >= ArrayUtil.MAX_ARRAY_LENGTH) {
5981
// Throw exception to prevent confusing OOME:
6082
throw new IllegalArgumentException(
6183
"maxSize must be > 0 and < " + (ArrayUtil.MAX_ARRAY_LENGTH - 1) + "; got: " + maxSize);
6284
}
85+
if (arity < 2) {
86+
throw new IllegalArgumentException("arity must be >= 2; got: " + arity);
87+
}
6388
// NOTE: we add +1 because all access to heap is 1-based not 0-based. heap[0] is unused.
64-
heapSize = maxSize + 1;
89+
final int heapSize = maxSize + 1;
6590
this.maxSize = maxSize;
91+
this.d = arity;
6692
this.heap = new long[heapSize];
6793
}
6894

95+
/** Returns the arity (branching factor). */
96+
public int arity() {
97+
return d;
98+
}
99+
69100
/**
70101
* Adds a value in log(size) time. Grows unbounded as needed to accommodate new values.
71102
*
@@ -162,33 +193,45 @@ public void clear() {
162193
size = 0;
163194
}
164195

165-
private void upHeap(int origPos) {
166-
int i = origPos;
196+
private void upHeap(int i) {
167197
long value = heap[i]; // save bottom value
168-
int j = i >>> 1;
169-
while (j > 0 && value < heap[j]) {
170-
heap[i] = heap[j]; // shift parents down
171-
i = j;
172-
j = j >>> 1;
198+
// correct parent formula for 1-based indexing
199+
int parent = ((i - 2) / d) + 1;
200+
while (i > 1 && value < heap[parent]) {
201+
heap[i] = heap[parent]; // shift parent down
202+
i = parent;
203+
parent = ((i - 2) / d) + 1;
173204
}
174205
heap[i] = value; // install saved value
175206
}
176207

177208
private void downHeap(int i) {
178209
long value = heap[i]; // save top value
179-
int j = i << 1; // find smaller child
180-
int k = j + 1;
181-
if (k <= size && heap[k] < heap[j]) {
182-
j = k;
183-
}
184-
while (j <= size && heap[j] < value) {
185-
heap[i] = heap[j]; // shift up child
186-
i = j;
187-
j = i << 1;
188-
k = j + 1;
189-
if (k <= size && heap[k] < heap[j]) {
190-
j = k;
210+
while (true) {
211+
// correct first child formula for 1-based indexing
212+
int firstChild = d * (i - 1) + 2;
213+
if (firstChild > size) {
214+
break; // i is a leaf
191215
}
216+
int lastChild = Math.min(firstChild + d - 1, size);
217+
218+
// find the smallest child in [firstChild, lastChild]
219+
int best = firstChild;
220+
long bestVal = heap[best];
221+
for (int c = firstChild + 1; c <= lastChild; c++) {
222+
long v = heap[c];
223+
if (v < bestVal) {
224+
bestVal = v;
225+
best = c;
226+
}
227+
}
228+
229+
if (bestVal >= value) {
230+
break;
231+
}
232+
233+
heap[i] = bestVal;
234+
i = best;
192235
}
193236
heap[i] = value; // install saved value
194237
}

lucene/core/src/test/org/apache/lucene/util/TestLongHeap.java

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,14 @@ public class TestLongHeap extends LuceneTestCase {
2525

2626
private static void checkValidity(LongHeap heap) {
2727
long[] heapArray = heap.getHeapArray();
28-
for (int i = 2; i <= heap.size(); i++) {
29-
int parent = i >>> 1;
30-
assert heapArray[parent] <= heapArray[i];
28+
int d = heap.arity();
29+
int size = heap.size();
30+
for (int parent = 1; parent <= size; parent++) {
31+
int firstChild = d * (parent - 1) + 2;
32+
int lastChild = Math.min(firstChild + d - 1, size);
33+
for (int c = firstChild; c <= lastChild; c++) {
34+
assert heapArray[parent] <= heapArray[c];
35+
}
3136
}
3237
}
3338

0 commit comments

Comments
 (0)