Skip to content

Commit 5320141

Browse files
committed
Add MathUtil#unsignedMin to simplify dividing the doc ID space into windows. (#14750)
We often need to divide the doc ID space into sub windows, but this is prone to integer overflows if the size of the window is greater than `Integer.MAX_VALUE - windowStart`. This adds `MathUtil#unsignedMin` to make these overflows a bit easier to manage.
1 parent 92de7a2 commit 5320141

File tree

9 files changed

+35
-11
lines changed

9 files changed

+35
-11
lines changed

lucene/core/src/java/org/apache/lucene/search/BitSetDocIdStream.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
import java.io.IOException;
2020
import org.apache.lucene.util.FixedBitSet;
21+
import org.apache.lucene.util.MathUtil;
2122

2223
final class BitSetDocIdStream extends DocIdStream {
2324

@@ -29,7 +30,7 @@ final class BitSetDocIdStream extends DocIdStream {
2930
this.bitSet = bitSet;
3031
this.offset = offset;
3132
upTo = offset;
32-
max = (int) Math.min(Integer.MAX_VALUE, (long) offset + bitSet.length());
33+
max = MathUtil.unsignedMin(Integer.MAX_VALUE, offset + bitSet.length());
3334
}
3435

3536
@Override

lucene/core/src/java/org/apache/lucene/search/DenseConjunctionBulkScorer.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import java.util.List;
2323
import org.apache.lucene.util.Bits;
2424
import org.apache.lucene.util.FixedBitSet;
25+
import org.apache.lucene.util.MathUtil;
2526

2627
/**
2728
* BulkScorer implementation of {@link ConjunctionScorer} that is specialized for dense clauses.
@@ -176,7 +177,7 @@ private int scoreWindow(
176177
// data, which helps evaluate fewer clauses per window - without allowing windows to become too
177178
// small thanks to the WINDOW_SIZE/2 threshold.
178179
int minDocIDRunEnd = max;
179-
final int minRunEndThreshold = (int) Math.min((long) min + WINDOW_SIZE / 2, max);
180+
final int minRunEndThreshold = MathUtil.unsignedMin(min + WINDOW_SIZE / 2, max);
180181
for (DisiWrapper w : iterators) {
181182
int docIdRunEnd = w.docIDRunEnd();
182183
if (w.docID() > min || docIdRunEnd < minRunEndThreshold) {
@@ -195,7 +196,7 @@ private int scoreWindow(
195196
return minDocIDRunEnd;
196197
}
197198

198-
int bitsetWindowMax = (int) Math.min(minDocIDRunEnd, (long) WINDOW_SIZE + min);
199+
int bitsetWindowMax = MathUtil.unsignedMin(minDocIDRunEnd, WINDOW_SIZE + min);
199200

200201
if (windowTwoPhases.isEmpty()) {
201202
scoreWindowUsingBitSet(collector, acceptDocs, windowApproximations, min, bitsetWindowMax);

lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxBulkScorer.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import java.util.Objects;
2323
import org.apache.lucene.util.Bits;
2424
import org.apache.lucene.util.FixedBitSet;
25+
import org.apache.lucene.util.MathUtil;
2526
import org.apache.lucene.util.PriorityQueue;
2627

2728
/** Bulk scorer for {@link DisjunctionMaxQuery} when the tie-break multiplier is zero. */
@@ -67,7 +68,7 @@ public int score(LeafCollector collector, Bits acceptDocs, int min, int max) thr
6768

6869
while (top.next < max) {
6970
final int windowMin = Math.max(top.next, min);
70-
final int windowMax = (int) Math.min(max, (long) windowMin + WINDOW_SIZE);
71+
final int windowMax = MathUtil.unsignedMin(max, windowMin + WINDOW_SIZE);
7172

7273
// First compute matches / scores in the window
7374
do {

lucene/core/src/java/org/apache/lucene/search/MaxScoreBulkScorer.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ private void scoreInnerWindowWithFilter(
181181
// Only score an inner window, after that we'll check if the min competitive score has increased
182182
// enough for a more favorable partitioning to be used.
183183
int innerWindowMin = top.doc;
184-
int innerWindowMax = (int) Math.min(max, (long) innerWindowMin + INNER_WINDOW_SIZE);
184+
int innerWindowMax = MathUtil.unsignedMin(max, innerWindowMin + INNER_WINDOW_SIZE);
185185

186186
docAndScoreAccBuffer.size = 0;
187187
while (top.doc < innerWindowMax) {
@@ -277,7 +277,7 @@ private void scoreInnerWindowMultipleEssentialClauses(
277277
DisiWrapper top = essentialQueue.top();
278278

279279
int innerWindowMin = top.doc;
280-
int innerWindowMax = (int) Math.min(max, (long) innerWindowMin + INNER_WINDOW_SIZE);
280+
int innerWindowMax = MathUtil.unsignedMin(max, innerWindowMin + INNER_WINDOW_SIZE);
281281
int innerWindowSize = innerWindowMax - innerWindowMin;
282282

283283
// Collect matches of essential clauses into a bitset
@@ -324,7 +324,7 @@ private int computeOuterWindowMax(int windowMin) throws IOException {
324324
final DisiWrapper scorer = allScorers[i];
325325
if (filter == null || scorer.cost >= filter.cost) {
326326
final int upTo = scorer.scorer.advanceShallow(Math.max(scorer.doc, windowMin));
327-
windowMax = (int) Math.min(windowMax, upTo + 1L); // upTo is inclusive
327+
windowMax = MathUtil.unsignedMin(windowMax, upTo + 1); // upTo is inclusive
328328
}
329329
}
330330

@@ -341,7 +341,7 @@ private int computeOuterWindowMax(int windowMin) throws IOException {
341341
minWindowSize = 1;
342342
}
343343

344-
int minWindowMax = (int) Math.min(Integer.MAX_VALUE, (long) windowMin + minWindowSize);
344+
int minWindowMax = MathUtil.unsignedMin(Integer.MAX_VALUE, windowMin + minWindowSize);
345345
windowMax = Math.max(windowMax, minWindowMax);
346346
}
347347

lucene/core/src/java/org/apache/lucene/search/TimeLimitingBulkScorer.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import java.util.Objects;
2222
import org.apache.lucene.index.QueryTimeout;
2323
import org.apache.lucene.util.Bits;
24+
import org.apache.lucene.util.MathUtil;
2425

2526
/**
2627
* The {@link TimeLimitingBulkScorer} is used to timeout search requests that take longer than the
@@ -69,7 +70,7 @@ public TimeLimitingBulkScorer(BulkScorer bulkScorer, QueryTimeout queryTimeout)
6970
public int score(LeafCollector collector, Bits acceptDocs, int min, int max) throws IOException {
7071
int interval = INTERVAL;
7172
while (min < max) {
72-
final int newMax = (int) Math.min((long) min + interval, max);
73+
final int newMax = MathUtil.unsignedMin(min + interval, max);
7374
final int newInterval =
7475
interval + (interval >> 1); // increase the interval by 50% on each iteration
7576
// overflow check

lucene/core/src/java/org/apache/lucene/util/BitSetIterator.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ public void intoBitSet(int upTo, FixedBitSet bitSet, int offset) throws IOExcept
100100
// The destination bit set may be shorter than this bit set. This is only legal if all bits
101101
// beyond offset + bitSet.length() are clear. If not, the below call to `super.intoBitSet`
102102
// will throw an exception.
103-
actualUpto = (int) Math.min(actualUpto, offset + (long) bitSet.length());
103+
actualUpto = MathUtil.unsignedMin(actualUpto, offset + bitSet.length());
104104
FixedBitSet.orRange(fixedBits, doc, bitSet, doc - offset, actualUpto - doc);
105105
advance(actualUpto); // set the current doc
106106
}

lucene/core/src/java/org/apache/lucene/util/DocBaseBitSetIterator.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ public void intoBitSet(int upTo, FixedBitSet bitSet, int offset) throws IOExcept
9292
// The destination bit set may be shorter than this bit set. This is only legal if all bits
9393
// beyond offset + bitSet.length() are clear. If not, the below call to `super.intoBitSet` will
9494
// throw an exception.
95-
actualUpto = (int) Math.min(actualUpto, offset + (long) bitSet.length());
95+
actualUpto = MathUtil.unsignedMin(actualUpto, offset + bitSet.length());
9696
if (actualUpto > doc) {
9797
FixedBitSet.orRange(bits, doc - docBase, bitSet, doc - offset, actualUpto - doc);
9898
advance(actualUpto); // set the current doc

lucene/core/src/java/org/apache/lucene/util/MathUtil.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,4 +192,9 @@ public static double sumUpperBound(double sum, int numValues) {
192192
double b = MathUtil.sumRelativeErrorBound(numValues);
193193
return (1.0 + 2 * b) * sum;
194194
}
195+
196+
/** Return the min of the two given unsigned integers. */
197+
public static int unsignedMin(int a, int b) {
198+
return Integer.compareUnsigned(a, b) < 0 ? a : b;
199+
}
195200
}

lucene/core/src/test/org/apache/lucene/util/TestMathUtil.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,4 +181,19 @@ public void testAtanhMethod() {
181181
assertEquals(0.5493061443340549, MathUtil.atanh(0.5), epsilon);
182182
assertEquals(Double.POSITIVE_INFINITY, MathUtil.atanh(1), 0);
183183
}
184+
185+
public void testUnsignedMin() {
186+
assertEquals(0, MathUtil.unsignedMin(0, 0));
187+
assertEquals(0, MathUtil.unsignedMin(0, 3));
188+
assertEquals(0, MathUtil.unsignedMin(3, 0));
189+
assertEquals(0, MathUtil.unsignedMin(0, Integer.MAX_VALUE));
190+
assertEquals(0, MathUtil.unsignedMin(Integer.MAX_VALUE, 0));
191+
assertEquals(Integer.MAX_VALUE, MathUtil.unsignedMin(Integer.MAX_VALUE, Integer.MAX_VALUE + 1));
192+
assertEquals(Integer.MAX_VALUE, MathUtil.unsignedMin(Integer.MAX_VALUE + 1, Integer.MAX_VALUE));
193+
assertEquals(Integer.MAX_VALUE, MathUtil.unsignedMin(Integer.MAX_VALUE, Integer.MIN_VALUE));
194+
assertEquals(Integer.MAX_VALUE, MathUtil.unsignedMin(Integer.MIN_VALUE, Integer.MAX_VALUE));
195+
assertEquals(Integer.MIN_VALUE, MathUtil.unsignedMin(Integer.MIN_VALUE, -1));
196+
assertEquals(Integer.MIN_VALUE, MathUtil.unsignedMin(-1, Integer.MIN_VALUE));
197+
assertEquals(Integer.MIN_VALUE, MathUtil.unsignedMin(Integer.MIN_VALUE, Integer.MIN_VALUE));
198+
}
184199
}

0 commit comments

Comments
 (0)