Skip to content

Commit 59df9e7

Browse files
Avoid copying bitset that's subsequently cleared (#15582)
* Avoid copying bitset that's subsequently cleared In two instances, `FixedBitSet.ensureCapacity()` was called right before the bitset was cleared. This unnecessarily copied the internal array. This PR adds `FixedBitSet.ensureCapacityAndClear()` that avoids the copying. This should give a small performance improvement. * Improve javadoc * Update CHANGES * Finish the renaming
1 parent 0ef4d97 commit 59df9e7

File tree

4 files changed

+36
-17
lines changed

4 files changed

+36
-17
lines changed

lucene/CHANGES.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,8 @@ Optimizations
9898

9999
* GITHUB#15085, GITHUB#15092: Hunspell suggestions: Ensure candidate roots are not worse before updating. (Ilia Permiashkin)
100100

101+
* GITHUB#15582: Avoid copying bitset that's subsequently cleared
102+
101103
Bug Fixes
102104
---------------------
103105
* GITHUB#14049: Randomize KNN codec params in RandomCodec. Fixes scalar quantization div-by-zero

lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -553,8 +553,7 @@ public void intoBitSet(int upTo, FixedBitSet bitSet, int offset) throws IOExcept
553553
scratch = new FixedBitSet(bitSet.length());
554554
} else {
555555
// It's OK even if bitset.length() == 0 according the contract.
556-
scratch = FixedBitSet.ensureCapacity(scratch, bitSet.length() - 1);
557-
scratch.clear();
556+
scratch = FixedBitSet.ensureCapacityAndClear(scratch, bitSet.length() - 1);
558557
}
559558

560559
onDiskDocValues.intoBitSet(upTo, scratch, offset);

lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java

Lines changed: 30 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -42,25 +42,42 @@ public final class FixedBitSet extends BitSet {
4242
private final int numBits; // The number of bits in use
4343
private final int numWords; // The exact number of longs needed to hold numBits (<= bits.length)
4444

45-
/**
46-
* If the given {@link FixedBitSet} is large enough to hold {@code numBits+1}, returns the given
47-
* bits, otherwise returns a new {@link FixedBitSet} which can hold {@code numBits+1} bits. That
48-
* means the bitset returned by this method can be safely called with {@code bits.set(numBits)}
49-
*
50-
* <p><b>NOTE:</b> the returned bitset reuses the underlying {@code long[]} of the given {@code
51-
* bits} if possible. Also, calling {@link #length()} on the returned bits may return a value
52-
* greater than {@code numBits+1}.
53-
*/
54-
public static FixedBitSet ensureCapacity(FixedBitSet bits, int numBits) {
55-
if (numBits < bits.numBits) {
45+
/// Ensure the given `bits` can store a value at `desiredBit` index. If the current [#length()] is
46+
/// sufficient, `bits` is simply returned. Otherwise, a new, larger bitset is allocated, with
47+
/// contents of `bits` copied.
48+
///
49+
/// @see #ensureCapacityAndClear(FixedBitSet, int)
50+
public static FixedBitSet ensureCapacity(FixedBitSet bits, int desiredBit) {
51+
return ensureCapacityInternal(bits, desiredBit, true);
52+
}
53+
54+
/// Clear the given `bits` and ensure it can store a value at `desiredBit` index. If the current
55+
/// [#length()] is sufficient, `bits` is simply cleared and returned. Otherwise, a new, larger
56+
/// bitset is allocated.
57+
///
58+
/// @see #ensureCapacity(FixedBitSet, int)
59+
public static FixedBitSet ensureCapacityAndClear(FixedBitSet bits, int desiredBit) {
60+
return ensureCapacityInternal(bits, desiredBit, false);
61+
}
62+
63+
private static FixedBitSet ensureCapacityInternal(
64+
FixedBitSet bits, int desiredBit, boolean preserveData) {
65+
if (desiredBit < bits.numBits) {
66+
if (!preserveData) {
67+
bits.clear();
68+
}
5669
return bits;
5770
} else {
5871
// Depends on the ghost bits being clear!
5972
// (Otherwise, they may become visible in the new instance)
60-
int numWords = bits2words(numBits);
73+
int numWords = bits2words(desiredBit);
6174
long[] arr = bits.getBits();
6275
if (numWords >= arr.length) {
63-
arr = ArrayUtil.grow(arr, numWords + 1);
76+
if (preserveData) {
77+
arr = ArrayUtil.grow(arr, numWords + 1);
78+
} else {
79+
arr = new long[ArrayUtil.oversize(numWords + 1, Long.BYTES)];
80+
}
6481
}
6582
return new FixedBitSet(arr, arr.length << 6);
6683
}

lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -369,9 +369,10 @@ void searchLevel(
369369
private void prepareScratchState(int capacity, int bulkScoreSize) {
370370
candidates.clear();
371371
if (visited.length() < capacity) {
372-
visited = FixedBitSet.ensureCapacity((FixedBitSet) visited, capacity);
372+
visited = FixedBitSet.ensureCapacityAndClear((FixedBitSet) visited, capacity);
373+
} else {
374+
visited.clear();
373375
}
374-
visited.clear();
375376
if (bulkNodes == null || bulkNodes.length < bulkScoreSize) {
376377
bulkNodes = new int[bulkScoreSize];
377378
bulkScores = new float[bulkScoreSize];

0 commit comments

Comments
 (0)