Skip to content

Commit a49908f

Browse files
committed
Introduce growInRange to reduce array overallocation
In cases where we know there is an upper limit to the potential size of an array, we can use `growInRange` to avoid allocating beyond that limit. We address such cases in `DirectoryTaxonomyReader` and `NeighborArray`.
1 parent 74fe7f7 commit a49908f

File tree

6 files changed

+100
-20
lines changed

6 files changed

+100
-20
lines changed

lucene/core/src/java/org/apache/lucene/util/ArrayUtil.java

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -330,6 +330,29 @@ public static int[] growExact(int[] array, int newLength) {
330330
return copy;
331331
}
332332

333+
/**
334+
* Returns an array whose size is at least {@code minLength}, generally over-allocating
335+
* exponentially, but never allocating more than {@code maxLength} elements.
336+
*/
337+
public static int[] growInRange(int[] array, int minLength, int maxLength) {
338+
if (array.length >= minLength) {
339+
return array;
340+
}
341+
if (minLength > maxLength) {
342+
throw new IllegalArgumentException(
343+
"requested minimum array length "
344+
+ minLength
345+
+ " is larger than requested maximum array length "
346+
+ maxLength);
347+
}
348+
349+
int potentialLength = oversize(minLength, Integer.BYTES);
350+
if (potentialLength > maxLength) {
351+
return growExact(array, maxLength);
352+
}
353+
return growExact(array, potentialLength);
354+
}
355+
333356
/**
334357
* Returns an array whose size is at least {@code minSize}, generally over-allocating
335358
* exponentially

lucene/core/src/java/org/apache/lucene/util/hnsw/NeighborArray.java

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@
2121
import java.util.Arrays;
2222
import java.util.concurrent.locks.ReadWriteLock;
2323
import java.util.concurrent.locks.ReentrantReadWriteLock;
24+
import org.apache.lucene.util.Accountable;
2425
import org.apache.lucene.util.ArrayUtil;
26+
import org.apache.lucene.util.RamUsageEstimator;
2527

2628
/**
2729
* NeighborArray encodes the neighbors of a node and their mutual scores in the HNSW graph as a pair
@@ -31,18 +33,21 @@
3133
*
3234
* @lucene.internal
3335
*/
34-
public class NeighborArray {
36+
public class NeighborArray implements Accountable {
37+
private static final int INITIAL_CAPACITY = 10;
3538
private final boolean scoresDescOrder;
39+
private final int maxSize;
3640
private int size;
3741
float[] score;
3842
int[] node;
3943
private int sortedNodeSize;
4044
public final ReadWriteLock rwlock = new ReentrantReadWriteLock(true);
4145

4246
public NeighborArray(int maxSize, boolean descOrder) {
43-
node = new int[maxSize];
44-
score = new float[maxSize];
47+
node = new int[INITIAL_CAPACITY];
48+
score = new float[INITIAL_CAPACITY];
4549
this.scoresDescOrder = descOrder;
50+
this.maxSize = maxSize;
4651
}
4752

4853
/**
@@ -52,7 +57,7 @@ public NeighborArray(int maxSize, boolean descOrder) {
5257
public void addInOrder(int newNode, float newScore) {
5358
assert size == sortedNodeSize : "cannot call addInOrder after addOutOfOrder";
5459
if (size == node.length) {
55-
node = ArrayUtil.grow(node);
60+
node = ArrayUtil.growInRange(node, size + 1, maxSize);
5661
score = ArrayUtil.growExact(score, node.length);
5762
}
5863
if (size > 0) {
@@ -73,7 +78,7 @@ public void addInOrder(int newNode, float newScore) {
7378
/** Add node and newScore but do not insert as sorted */
7479
public void addOutOfOrder(int newNode, float newScore) {
7580
if (size == node.length) {
76-
node = ArrayUtil.grow(node);
81+
node = ArrayUtil.growInRange(node, size + 1, maxSize);
7782
score = ArrayUtil.growExact(score, node.length);
7883
}
7984

@@ -206,4 +211,12 @@ private int descSortFindRightMostInsertionPoint(float newScore, int bound) {
206211
}
207212
return start;
208213
}
214+
215+
@Override
216+
public long ramBytesUsed() {
217+
return (long) node.length * (Integer.BYTES + Float.BYTES)
218+
+ RamUsageEstimator.NUM_BYTES_ARRAY_HEADER * 2L
219+
+ RamUsageEstimator.NUM_BYTES_OBJECT_REF * 2L
220+
+ Integer.BYTES * 5;
221+
}
209222
}

lucene/core/src/java/org/apache/lucene/util/hnsw/OnHeapHnswGraph.java

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -275,21 +275,15 @@ private void generateLevelToNodes() {
275275

276276
@Override
277277
public long ramBytesUsed() {
278-
long neighborArrayBytes0 =
279-
(long) nsize0 * (Integer.BYTES + Float.BYTES)
280-
+ RamUsageEstimator.NUM_BYTES_ARRAY_HEADER * 2L
281-
+ RamUsageEstimator.NUM_BYTES_OBJECT_REF * 2L
282-
+ Integer.BYTES * 3;
283-
long neighborArrayBytes =
284-
(long) nsize * (Integer.BYTES + Float.BYTES)
285-
+ RamUsageEstimator.NUM_BYTES_ARRAY_HEADER * 2L
286-
+ RamUsageEstimator.NUM_BYTES_OBJECT_REF * 2L
287-
+ Integer.BYTES * 3;
288278
long total = 0;
289-
total +=
290-
size() * (neighborArrayBytes0 + RamUsageEstimator.NUM_BYTES_ARRAY_HEADER)
291-
+ RamUsageEstimator.NUM_BYTES_ARRAY_HEADER; // for graph and level 0;
292-
total += nonZeroLevelSize.get() * neighborArrayBytes; // for non-zero level
279+
for (NeighborArray[] neighborArraysPerNode : graph) {
280+
if (neighborArraysPerNode != null) {
281+
for (NeighborArray neighborArrayPerNodeAndLevel : neighborArraysPerNode) {
282+
total += neighborArrayPerNodeAndLevel.ramBytesUsed();
283+
}
284+
}
285+
}
286+
293287
total += 4 * Integer.BYTES; // all int fields
294288
total += 1; // field: noGrowth
295289
total +=

lucene/core/src/test/org/apache/lucene/util/TestArrayUtil.java

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818

1919
import static org.apache.lucene.util.ArrayUtil.copyOfSubArray;
2020
import static org.apache.lucene.util.ArrayUtil.growExact;
21+
import static org.apache.lucene.util.ArrayUtil.growInRange;
22+
import static org.apache.lucene.util.ArrayUtil.oversize;
2123

2224
import java.util.Arrays;
2325
import java.util.Collections;
@@ -371,6 +373,33 @@ public void testGrowExact() {
371373
() -> growExact(new String[] {"a", "b", "c"}, random().nextInt(3)));
372374
}
373375

376+
public void testGrowInRange() {
377+
int[] array = new int[] {1, 2, 3};
378+
379+
// maxLength does not matter as long as minLength is enough
380+
assertSame(array, growInRange(array, 1, 4));
381+
assertSame(array, growInRange(array, 1, 4));
382+
assertSame(array, growInRange(array, 1, 2));
383+
assertSame(array, growInRange(array, 1, 1));
384+
assertSame(array, growInRange(array, 1, 0));
385+
assertSame(array, growInRange(array, 1, -1));
386+
387+
// maxLength < minLength if the array has to be grown throws an exception
388+
expectThrows(IllegalArgumentException.class, () -> growInRange(array, 4, 3));
389+
expectThrows(IllegalArgumentException.class, () -> growInRange(array, 5, 4));
390+
391+
int minLength = 4;
392+
int maxLength = Integer.MAX_VALUE;
393+
394+
// The array grows normally if maxLength permits
395+
assertEquals(
396+
oversize(minLength, Integer.BYTES),
397+
growInRange(new int[] {1, 2, 3}, minLength, maxLength).length);
398+
399+
// The array grows to maxLength if maxLength is limiting
400+
assertEquals(minLength, growInRange(new int[] {1, 2, 3}, minLength, minLength).length);
401+
}
402+
374403
public void testCopyOfSubArray() {
375404
short[] shortArray = {1, 2, 3};
376405
assertArrayEquals(new short[] {1}, copyOfSubArray(shortArray, 0, 1));

lucene/core/src/test/org/apache/lucene/util/hnsw/TestNeighborArray.java

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,12 @@
1717

1818
package org.apache.lucene.util.hnsw;
1919

20+
import static org.apache.lucene.tests.util.RamUsageTester.ramUsed;
21+
2022
import java.io.IOException;
2123
import org.apache.lucene.tests.util.LuceneTestCase;
2224
import org.apache.lucene.util.Bits;
25+
import org.apache.lucene.util.RamUsageEstimator;
2326

2427
public class TestNeighborArray extends LuceneTestCase {
2528

@@ -241,4 +244,21 @@ default Bits getAcceptOrds(Bits acceptDocs) {
241244
throw new UnsupportedOperationException();
242245
}
243246
}
247+
248+
public void testRamUsageEstimate() {
249+
int maxSize = random().nextInt(100, 10_000);
250+
int numAdditions = random().nextInt(100, 1_000);
251+
NeighborArray neighbors = new NeighborArray(maxSize, true);
252+
253+
int count = 0;
254+
while (count < maxSize) {
255+
for (int i = 0; i < numAdditions && count < maxSize; i++) {
256+
neighbors.addInOrder(count, 0f);
257+
count++;
258+
}
259+
long estimated = RamUsageEstimator.sizeOfObject(neighbors);
260+
long actual = ramUsed(neighbors);
261+
assertEquals((double) actual, (double) estimated, (double) actual * 0.3);
262+
}
263+
}
244264
}

lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -351,7 +351,8 @@ public int[] getBulkOrdinals(FacetLabel... categoryPaths) throws IOException {
351351
}
352352
} else {
353353
indexesMissingFromCache =
354-
ArrayUtil.grow(indexesMissingFromCache, numberOfMissingFromCache + 1);
354+
ArrayUtil.growInRange(
355+
indexesMissingFromCache, numberOfMissingFromCache + 1, categoryPaths.length);
355356
indexesMissingFromCache[numberOfMissingFromCache++] = i;
356357
}
357358
}

0 commit comments

Comments
 (0)