Skip to content

Commit 15d8252

Browse files
viliam-durinadweiss
authored andcommitted
Avoid copying bitset that's subsequently cleared (#15582)
Sync up changes.txt (10x)
1 parent 62231ea commit 15d8252

File tree

4 files changed

+40
-17
lines changed

4 files changed

+40
-17
lines changed

lucene/CHANGES.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,10 @@ Optimizations
157157

158158
* GITHUB#15511: Dynamic pruning for SORTED(_SET) fields with doc values skipper (Pan Guixin)
159159

160+
* GITHUB#15560: Avoid unnecessary getGraph() call. (Shubham Chaudhary)
161+
162+
* GITHUB#15582: Avoid copying bitset that's subsequently cleared (Viliam Durina)
163+
160164
Bug Fixes
161165
---------------------
162166
* GITHUB#14161: PointInSetQuery's constructor now throws IllegalArgumentException
@@ -196,6 +200,8 @@ Other
196200
* GITHUB#15481: The `reverse` field of SortField is now final. If you have subclassed SortField,
197201
you should set `reverse` in the super constructor. (Alan Woodward)
198202

203+
* GITHUB#15476: Enforce fallback support for float vector retrieval in quantized KNN vector formats. (Pulkit Gupta)
204+
199205
* GITHUB#15513: Update documentation in DefaultBloomFilterFactory to reflect changes made in GITHUB#11900 (Greg Miller)
200206

201207
* GITHUB#15341: Align float vectors on disk to 64 bytes, for optimal performance on Arm Neoverse

lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -553,8 +553,7 @@ public void intoBitSet(int upTo, FixedBitSet bitSet, int offset) throws IOExcept
553553
scratch = new FixedBitSet(bitSet.length());
554554
} else {
555555
// It's OK even if bitset.length() == 0 according the contract.
556-
scratch = FixedBitSet.ensureCapacity(scratch, bitSet.length() - 1);
557-
scratch.clear();
556+
scratch = FixedBitSet.ensureCapacityAndClear(scratch, bitSet.length() - 1);
558557
}
559558

560559
onDiskDocValues.intoBitSet(upTo, scratch, offset);

lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java

Lines changed: 30 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -43,25 +43,42 @@ public final class FixedBitSet extends BitSet {
4343
private final int numBits; // The number of bits in use
4444
private final int numWords; // The exact number of longs needed to hold numBits (<= bits.length)
4545

46-
/**
47-
* If the given {@link FixedBitSet} is large enough to hold {@code numBits+1}, returns the given
48-
* bits, otherwise returns a new {@link FixedBitSet} which can hold {@code numBits+1} bits. That
49-
* means the bitset returned by this method can be safely called with {@code bits.set(numBits)}
50-
*
51-
* <p><b>NOTE:</b> the returned bitset reuses the underlying {@code long[]} of the given {@code
52-
* bits} if possible. Also, calling {@link #length()} on the returned bits may return a value
53-
* greater than {@code numBits+1}.
54-
*/
55-
public static FixedBitSet ensureCapacity(FixedBitSet bits, int numBits) {
56-
if (numBits < bits.numBits) {
46+
/// Ensure the given `bits` can store a value at `desiredBit` index. If the current [#length()] is
47+
/// sufficient, `bits` is simply returned. Otherwise, a new, larger bitset is allocated, with
48+
/// contents of `bits` copied.
49+
///
50+
/// @see #ensureCapacityAndClear(FixedBitSet, int)
51+
public static FixedBitSet ensureCapacity(FixedBitSet bits, int desiredBit) {
52+
return ensureCapacityInternal(bits, desiredBit, true);
53+
}
54+
55+
/// Clear the given `bits` and ensure it can store a value at `desiredBit` index. If the current
56+
/// [#length()] is sufficient, `bits` is simply cleared and returned. Otherwise, a new, larger
57+
/// bitset is allocated.
58+
///
59+
/// @see #ensureCapacity(FixedBitSet, int)
60+
public static FixedBitSet ensureCapacityAndClear(FixedBitSet bits, int desiredBit) {
61+
return ensureCapacityInternal(bits, desiredBit, false);
62+
}
63+
64+
private static FixedBitSet ensureCapacityInternal(
65+
FixedBitSet bits, int desiredBit, boolean preserveData) {
66+
if (desiredBit < bits.numBits) {
67+
if (!preserveData) {
68+
bits.clear();
69+
}
5770
return bits;
5871
} else {
5972
// Depends on the ghost bits being clear!
6073
// (Otherwise, they may become visible in the new instance)
61-
int numWords = bits2words(numBits);
74+
int numWords = bits2words(desiredBit);
6275
long[] arr = bits.getBits();
6376
if (numWords >= arr.length) {
64-
arr = ArrayUtil.grow(arr, numWords + 1);
77+
if (preserveData) {
78+
arr = ArrayUtil.grow(arr, numWords + 1);
79+
} else {
80+
arr = new long[ArrayUtil.oversize(numWords + 1, Long.BYTES)];
81+
}
6582
}
6683
return new FixedBitSet(arr, arr.length << 6);
6784
}

lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -369,9 +369,10 @@ void searchLevel(
369369
private void prepareScratchState(int capacity, int bulkScoreSize) {
370370
candidates.clear();
371371
if (visited.length() < capacity) {
372-
visited = FixedBitSet.ensureCapacity((FixedBitSet) visited, capacity);
372+
visited = FixedBitSet.ensureCapacityAndClear((FixedBitSet) visited, capacity);
373+
} else {
374+
visited.clear();
373375
}
374-
visited.clear();
375376
if (bulkNodes == null || bulkNodes.length < bulkScoreSize) {
376377
bulkNodes = new int[bulkScoreSize];
377378
bulkScores = new float[bulkScoreSize];

0 commit comments

Comments
 (0)