Skip to content

Commit 84b357c

Browse files
authored
Reduce NeighborArray heap memory (#14527)
* use FloatArrayList\IntArrayList to replace float[]\int[] * use getScores(int i) to replace scores() * add more tests * add change log and change the init value * update OnHeapHnswGraph ramBytesUsed method * improve * add MaxSizedIntArrayList * add MaxSizedFloatArrayList * add MaxSizedFloatArrayList * fixed tests * revert
1 parent 6b3c3e4 commit 84b357c

File tree

11 files changed

+929
-54
lines changed

11 files changed

+929
-54
lines changed

lucene/CHANGES.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,8 @@ Optimizations
106106

107107
* GITHUB#14447: Compute the doc range more efficiently when flushing doc block. (Pan Guixin)
108108

109+
* GITHUB#14527: Reduce NeighborArray heap memory. (weizijun)
110+
109111
* GITHUB#14529, GITHUB#14555, GITHUB#14618: Impl intoBitset for IndexedDISI and Docvalues. (Guo Feng)
110112

111113
* GITHUB#14552: Speed up flush of softdelete by intoBitset. (Guo Feng)
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.lucene.internal.hppc;
19+
20+
import static org.apache.lucene.internal.hppc.HashContainers.DEFAULT_EXPECTED_ELEMENTS;
21+
22+
import org.apache.lucene.util.ArrayUtil;
23+
import org.apache.lucene.util.RamUsageEstimator;
24+
25+
/**
26+
* An array-backed list of {@code float} with a maximum size limit.
27+
*
28+
* @lucene.internal
29+
*/
30+
public class MaxSizedFloatArrayList extends FloatArrayList {
31+
private static final long BASE_RAM_BYTES_USED =
32+
RamUsageEstimator.shallowSizeOfInstance(MaxSizedFloatArrayList.class);
33+
34+
final int maxSize;
35+
36+
/** New instance with sane defaults. */
37+
public MaxSizedFloatArrayList(int maxSize) {
38+
this(maxSize, DEFAULT_EXPECTED_ELEMENTS);
39+
}
40+
41+
/**
42+
* New instance with sane defaults.
43+
*
44+
* @param maxSize The maximum size this list can grow to
45+
* @param expectedElements The expected number of elements guaranteed not to cause buffer
46+
* expansion (inclusive).
47+
*/
48+
public MaxSizedFloatArrayList(int maxSize, int expectedElements) {
49+
super(expectedElements);
50+
assert expectedElements <= maxSize
51+
: "expectedElements (" + expectedElements + ") must be <= maxSize (" + maxSize + ")";
52+
this.maxSize = maxSize;
53+
}
54+
55+
/** Creates a new list from the elements of another list in its iteration order. */
56+
public MaxSizedFloatArrayList(MaxSizedFloatArrayList list) {
57+
super(list.size());
58+
this.maxSize = list.maxSize;
59+
addAll(list);
60+
}
61+
62+
@Override
63+
protected void ensureBufferSpace(int expectedAdditions) {
64+
if (elementsCount + expectedAdditions > maxSize) {
65+
throw new IllegalStateException("Cannot grow beyond maxSize: " + maxSize);
66+
}
67+
if (elementsCount + expectedAdditions > buffer.length) {
68+
this.buffer = ArrayUtil.growInRange(buffer, elementsCount + expectedAdditions, maxSize);
69+
}
70+
}
71+
72+
@Override
73+
public int hashCode() {
74+
int h = 1, max = elementsCount;
75+
h = 31 * h + maxSize;
76+
for (int i = 0; i < max; i++) {
77+
h = 31 * h + BitMixer.mix(this.buffer[i]);
78+
}
79+
return h;
80+
}
81+
82+
/**
83+
* Returns <code>true</code> only if the other object is an instance of the same class and with
84+
* the same elements and maxSize.
85+
*/
86+
@Override
87+
public boolean equals(Object obj) {
88+
if (this == obj) {
89+
return true;
90+
}
91+
if (obj == null || getClass() != obj.getClass()) {
92+
return false;
93+
}
94+
MaxSizedFloatArrayList other = (MaxSizedFloatArrayList) obj;
95+
return maxSize == other.maxSize && super.equals(obj);
96+
}
97+
98+
@Override
99+
public long ramBytesUsed() {
100+
return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(buffer);
101+
}
102+
}
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.lucene.internal.hppc;
19+
20+
import static org.apache.lucene.internal.hppc.HashContainers.DEFAULT_EXPECTED_ELEMENTS;
21+
22+
import org.apache.lucene.util.ArrayUtil;
23+
import org.apache.lucene.util.RamUsageEstimator;
24+
25+
/**
26+
* An array-backed list of {@code int} with a maximum size limit.
27+
*
28+
* @lucene.internal
29+
*/
30+
public class MaxSizedIntArrayList extends IntArrayList {
31+
private static final long BASE_RAM_BYTES_USED =
32+
RamUsageEstimator.shallowSizeOfInstance(MaxSizedIntArrayList.class);
33+
34+
final int maxSize;
35+
36+
/** New instance with sane defaults. */
37+
public MaxSizedIntArrayList(int maxSize) {
38+
this(maxSize, DEFAULT_EXPECTED_ELEMENTS);
39+
}
40+
41+
/**
42+
* New instance with sane defaults.
43+
*
44+
* @param maxSize The maximum size this list can grow to
45+
* @param expectedElements The expected number of elements guaranteed not to cause buffer
46+
* expansion (inclusive).
47+
*/
48+
public MaxSizedIntArrayList(int maxSize, int expectedElements) {
49+
super(expectedElements);
50+
assert expectedElements <= maxSize
51+
: "expectedElements (" + expectedElements + ") must be <= maxSize (" + maxSize + ")";
52+
this.maxSize = maxSize;
53+
}
54+
55+
/** Creates a new list from the elements of another list in its iteration order. */
56+
public MaxSizedIntArrayList(MaxSizedIntArrayList list) {
57+
super(list.size());
58+
this.maxSize = list.maxSize;
59+
addAll(list);
60+
}
61+
62+
@Override
63+
protected void ensureBufferSpace(int expectedAdditions) {
64+
if (elementsCount + expectedAdditions > maxSize) {
65+
throw new IllegalStateException("Cannot grow beyond maxSize: " + maxSize);
66+
}
67+
if (elementsCount + expectedAdditions > buffer.length) {
68+
this.buffer = ArrayUtil.growInRange(buffer, elementsCount + expectedAdditions, maxSize);
69+
}
70+
}
71+
72+
@Override
73+
public int hashCode() {
74+
int h = 1, max = elementsCount;
75+
h = 31 * h + maxSize;
76+
for (int i = 0; i < max; i++) {
77+
h = 31 * h + BitMixer.mix(this.buffer[i]);
78+
}
79+
return h;
80+
}
81+
82+
/**
83+
* Returns <code>true</code> only if the other object is an instance of the same class and with
84+
* the same elements and maxSize.
85+
*/
86+
@Override
87+
public boolean equals(Object obj) {
88+
if (this == obj) {
89+
return true;
90+
}
91+
if (obj == null || getClass() != obj.getClass()) {
92+
return false;
93+
}
94+
MaxSizedIntArrayList other = (MaxSizedIntArrayList) obj;
95+
return maxSize == other.maxSize && super.equals(obj);
96+
}
97+
98+
@Override
99+
public long ramBytesUsed() {
100+
return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(buffer);
101+
}
102+
}

lucene/core/src/java/org/apache/lucene/util/ArrayUtil.java

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -346,6 +346,30 @@ public static int[] growInRange(int[] array, int minLength, int maxLength) {
346346
return growExact(array, Math.min(maxLength, potentialLength));
347347
}
348348

349+
/**
350+
* Returns an array whose size is at least {@code minLength} but not over {@code maxLength},
351+
* growing exponentially if it needs to grow.
352+
*/
353+
public static float[] growInRange(float[] array, int minLength, int maxLength) {
354+
assert minLength >= 0
355+
: "minLength must be positive (got " + minLength + "): likely integer overflow?";
356+
357+
if (minLength > maxLength) {
358+
throw new IllegalArgumentException(
359+
"requested minimum array length "
360+
+ minLength
361+
+ " is larger than requested maximum array length "
362+
+ maxLength);
363+
}
364+
365+
if (array.length >= minLength) {
366+
return array;
367+
}
368+
369+
int potentialLength = oversize(minLength, Float.BYTES);
370+
return growExact(array, Math.min(maxLength, potentialLength));
371+
}
372+
349373
/**
350374
* Returns an array whose size is at least {@code minSize}, generally over-allocating
351375
* exponentially

lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphBuilder.java

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -362,13 +362,13 @@ private void addDiverseNeighbors(
362362
Lock lock = hnswLock.write(level, nbr);
363363
try {
364364
NeighborArray nbrsOfNbr = getGraph().getNeighbors(level, nbr);
365-
nbrsOfNbr.addAndEnsureDiversity(node, candidates.scores()[i], nbr, scorer);
365+
nbrsOfNbr.addAndEnsureDiversity(node, candidates.getScores(i), nbr, scorer);
366366
} finally {
367367
lock.unlock();
368368
}
369369
} else {
370370
NeighborArray nbrsOfNbr = hnsw.getNeighbors(level, nbr);
371-
nbrsOfNbr.addAndEnsureDiversity(node, candidates.scores()[i], nbr, scorer);
371+
nbrsOfNbr.addAndEnsureDiversity(node, candidates.getScores(i), nbr, scorer);
372372
}
373373
}
374374
}
@@ -389,7 +389,7 @@ private boolean[] selectAndLinkDiverse(
389389
// compare each neighbor (in distance order) against the closer neighbors selected so far,
390390
// only adding it if it is closer to the target than to any of the other selected neighbors
391391
int cNode = candidates.nodes()[i];
392-
float cScore = candidates.scores()[i];
392+
float cScore = candidates.getScores(i);
393393
assert cNode <= hnsw.maxNodeId();
394394
scorer.setScoringOrdinal(cNode);
395395
if (diversityCheck(cScore, neighbors, scorer)) {
@@ -444,6 +444,7 @@ void finish() throws IOException {
444444
// see: https://github.com/apache/lucene/issues/14214
445445
// connectComponents();
446446
frozen = true;
447+
hnsw.finishBuild();
447448
}
448449

449450
@SuppressWarnings("unused")
@@ -538,7 +539,7 @@ private void link(int level, int n0, int n1, float score, FixedBitSet notFullyCo
538539
// must subtract 1 here since the nodes array is one larger than the configured
539540
// max neighbors (M / 2M).
540541
// We should have taken care of this check by searching for not-full nodes
541-
int maxConn = nbr0.nodes().length - 1;
542+
int maxConn = nbr0.maxSize() - 1;
542543
assert notFullyConnected.get(n0);
543544
assert nbr0.size() < maxConn : "node " + n0 + " is full, has " + nbr0.size() + " friends";
544545
nbr0.addOutOfOrder(n1, score);

0 commit comments

Comments
 (0)