Skip to content

Commit 7271b5f

Browse files
authored
Updating Dense#intoBitSet to properly set upTo if it exceeds bitset size (#14922)
1 parent 93a9e50 commit 7271b5f

File tree

3 files changed

+51
-6
lines changed

3 files changed

+51
-6
lines changed

lucene/CHANGES.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,9 @@ Bug Fixes
234234

235235
* GITHUB#14838: Make it possible to extend Patience/Seeded knn queries (Tommaso Teofili)
236236

237+
* GITHUB#14922: Fix for IndexedDISI.Dense#intoBitSetWithinBlock to take into account the provided bitset size
238+
and avoid throwing IndexOutOfBoundsException (Panagiotis Bailis)
239+
237240
Build
238241
---------------------
239242
* Upgrade forbiddenapis to version 3.9. (Uwe Schindler)

lucene/core/src/java/org/apache/lucene/codecs/lucene90/IndexedDISI.java

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import org.apache.lucene.util.ArrayUtil;
2828
import org.apache.lucene.util.BitSetIterator;
2929
import org.apache.lucene.util.FixedBitSet;
30+
import org.apache.lucene.util.MathUtil;
3031
import org.apache.lucene.util.RoaringDocIdSet;
3132

3233
/**
@@ -473,7 +474,7 @@ public int advance(int target) throws IOException {
473474

474475
@Override
475476
public void intoBitSet(int upTo, FixedBitSet bitSet, int offset) throws IOException {
476-
assert doc >= offset;
477+
assert doc >= offset : "offset=" + offset + " doc=" + doc;
477478
while (doc < upTo && method.intoBitSetWithinBlock(this, upTo, bitSet, offset) == false) {
478479
readBlockHeader();
479480
boolean found = method.advanceWithinBlock(this, block);
@@ -719,10 +720,10 @@ boolean intoBitSetWithinBlock(IndexedDISI disi, int upTo, FixedBitSet bitSet, in
719720
if (disi.bitSet == null) {
720721
disi.bitSet = new FixedBitSet(BLOCK_SIZE);
721722
}
722-
723-
int sourceFrom = disi.doc & 0xFFFF;
724-
int sourceTo = Math.min(upTo - disi.block, BLOCK_SIZE);
725723
int destFrom = disi.doc - offset;
724+
int destTo = MathUtil.unsignedMin(upTo, offset + bitSet.length());
725+
int sourceFrom = disi.doc & 0xFFFF;
726+
int sourceTo = Math.min(destTo - disi.block, BLOCK_SIZE);
726727

727728
long fp = disi.slice.getFilePointer();
728729
disi.slice.seek(fp - Long.BYTES); // seek back a long to include current word (disi.word).
@@ -731,13 +732,24 @@ boolean intoBitSetWithinBlock(IndexedDISI disi, int upTo, FixedBitSet bitSet, in
731732
FixedBitSet.orRange(disi.bitSet, sourceFrom, bitSet, destFrom, sourceTo - sourceFrom);
732733

733734
int blockEnd = disi.block | 0xFFFF;
734-
if (upTo > blockEnd) {
735+
if (destTo > blockEnd) {
735736
disi.slice.seek(disi.blockEnd);
736737
disi.index += disi.bitSet.cardinality(sourceFrom, sourceTo);
737738
return false;
738739
} else {
739740
disi.slice.seek(fp);
740-
return advanceWithinBlock(disi, upTo);
741+
boolean found = advanceWithinBlock(disi, destTo);
742+
if (found && disi.doc < upTo) {
743+
throw new IllegalStateException(
744+
"There are bits set in the source bitset that are not accounted for."
745+
+ " doc="
746+
+ disi.doc
747+
+ " upTo="
748+
+ upTo
749+
+ " disi.block="
750+
+ disi.block);
751+
}
752+
return found;
741753
}
742754
}
743755

lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestIndexedDISI.java

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,36 @@ public void testDenseMultiBlock() throws IOException {
388388
}
389389
}
390390

391+
public void testDenseBitSizeLessThanBlockSize() throws IOException {
392+
final byte denseRankPower = (byte) (random().nextInt(7) + 7);
393+
try (Directory dir = newDirectory()) {
394+
// initialize a maxDoc that is less than IndexedDISI.BLOCK_SIZE
395+
int maxDoc = random().nextInt(4096 * 2, 65536);
396+
FixedBitSet set = new FixedBitSet(maxDoc);
397+
for (int i = 0; i < maxDoc; i += 2) { // Set every other to ensure dense
398+
set.set(i);
399+
}
400+
int jumpTableEntryCount; // this should always be 0 given that maxDoc < BLOCK_SIZE
401+
long length;
402+
try (IndexOutput out = dir.createOutput("foo", IOContext.DEFAULT)) {
403+
jumpTableEntryCount =
404+
IndexedDISI.writeBitSet(
405+
new BitSetIterator(set, set.cardinality()), out, denseRankPower);
406+
length = out.getFilePointer();
407+
assertTrue(
408+
"jumpTableEntryCount should be 0 for dense bitsets with size < BLOCK_SIZE",
409+
0 == jumpTableEntryCount);
410+
}
411+
try (IndexInput in = dir.openInput("foo", IOContext.DEFAULT)) {
412+
IndexedDISI disi =
413+
new IndexedDISI(in, 0L, length, jumpTableEntryCount, denseRankPower, set.cardinality());
414+
FixedBitSet disiSet = new FixedBitSet(maxDoc);
415+
// This would throw IOOB if bitset size is not handled correctly as per #14882
416+
disiSet.or(disi);
417+
}
418+
}
419+
}
420+
391421
public void testIllegalDenseRankPower() throws IOException {
392422

393423
// Legal values

0 commit comments

Comments
 (0)