diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 8c7663743b9c..10fa8eae98cb 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -98,6 +98,8 @@ Optimizations * GITHUB#15085, GITHUB#15092: Hunspell suggestions: Ensure candidate roots are not worse before updating. (Ilia Permiashkin) +* GITHUB#15582: Avoid copying bitset that's subsequently cleared + Bug Fixes --------------------- * GITHUB#14049: Randomize KNN codec params in RandomCodec. Fixes scalar quantization div-by-zero diff --git a/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java b/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java index 6ea1b2f5a6b1..2f79618e8287 100644 --- a/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java +++ b/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java @@ -553,8 +553,7 @@ public void intoBitSet(int upTo, FixedBitSet bitSet, int offset) throws IOExcept scratch = new FixedBitSet(bitSet.length()); } else { // It's OK even if bitset.length() == 0 according the contract. - scratch = FixedBitSet.ensureCapacity(scratch, bitSet.length() - 1); - scratch.clear(); + scratch = FixedBitSet.ensureCapacityAndClear(scratch, bitSet.length() - 1); } onDiskDocValues.intoBitSet(upTo, scratch, offset); diff --git a/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java b/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java index fbc27aa6f48f..c3dfafb465ac 100644 --- a/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java +++ b/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java @@ -42,25 +42,42 @@ public final class FixedBitSet extends BitSet { private final int numBits; // The number of bits in use private final int numWords; // The exact number of longs needed to hold numBits (<= bits.length) - /** - * If the given {@link FixedBitSet} is large enough to hold {@code numBits+1}, returns the given - * bits, otherwise returns a new {@link FixedBitSet} which can hold {@code numBits+1} bits. That - * means the bitset returned by this method can be safely called with {@code bits.set(numBits)} - * - *
NOTE: the returned bitset reuses the underlying {@code long[]} of the given {@code - * bits} if possible. Also, calling {@link #length()} on the returned bits may return a value - * greater than {@code numBits+1}. - */ - public static FixedBitSet ensureCapacity(FixedBitSet bits, int numBits) { - if (numBits < bits.numBits) { + /// Ensure the given `bits` can store a value at `desiredBit` index. If the current [#length()] is + /// sufficient, `bits` is simply returned. Otherwise, a new, larger bitset is allocated, with + /// contents of `bits` copied. + /// + /// @see #ensureCapacityAndClear(FixedBitSet, int) + public static FixedBitSet ensureCapacity(FixedBitSet bits, int desiredBit) { + return ensureCapacityInternal(bits, desiredBit, true); + } + + /// Clear the given `bits` and ensure it can store a value at `desiredBit` index. If the current + /// [#length()] is sufficient, `bits` is simply cleared and returned. Otherwise, a new, larger + /// bitset is allocated. + /// + /// @see #ensureCapacity(FixedBitSet, int) + public static FixedBitSet ensureCapacityAndClear(FixedBitSet bits, int desiredBit) { + return ensureCapacityInternal(bits, desiredBit, false); + } + + private static FixedBitSet ensureCapacityInternal( + FixedBitSet bits, int desiredBit, boolean preserveData) { + if (desiredBit < bits.numBits) { + if (!preserveData) { + bits.clear(); + } return bits; } else { // Depends on the ghost bits being clear! // (Otherwise, they may become visible in the new instance) - int numWords = bits2words(numBits); + int numWords = bits2words(desiredBit); long[] arr = bits.getBits(); if (numWords >= arr.length) { - arr = ArrayUtil.grow(arr, numWords + 1); + if (preserveData) { + arr = ArrayUtil.grow(arr, numWords + 1); + } else { + arr = new long[ArrayUtil.oversize(numWords + 1, Long.BYTES)]; + } } return new FixedBitSet(arr, arr.length << 6); } diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java index 1876fe6c4cf5..d739915ca078 100644 --- a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java +++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java @@ -369,9 +369,10 @@ void searchLevel( private void prepareScratchState(int capacity, int bulkScoreSize) { candidates.clear(); if (visited.length() < capacity) { - visited = FixedBitSet.ensureCapacity((FixedBitSet) visited, capacity); + visited = FixedBitSet.ensureCapacityAndClear((FixedBitSet) visited, capacity); + } else { + visited.clear(); } - visited.clear(); if (bulkNodes == null || bulkNodes.length < bulkScoreSize) { bulkNodes = new int[bulkScoreSize]; bulkScores = new float[bulkScoreSize];