Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -67,4 +67,10 @@ public Matches matches(LeafReaderContext context, int doc) throws IOException {
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
return in.scorerSupplier(context);
}

@Override
public ScorerSupplier scorerSupplier(IndexSearcher.LeafReaderContextPartition partition)
throws IOException {
return in.scorerSupplier(partition);
}
}
11 changes: 10 additions & 1 deletion lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
Original file line number Diff line number Diff line change
Expand Up @@ -828,7 +828,16 @@ protected void searchLeaf(
// continue with the following leaf
return;
}
ScorerSupplier scorerSupplier = weight.scorerSupplier(ctx);

// Create partition object to pass to weight
final LeafReaderContextPartition partition;
if (minDocId == 0 && maxDocId == DocIdSetIterator.NO_MORE_DOCS) {
partition = LeafReaderContextPartition.createForEntireSegment(ctx);
} else {
partition = LeafReaderContextPartition.createFromAndTo(ctx, minDocId, maxDocId);
}

ScorerSupplier scorerSupplier = weight.scorerSupplier(partition);
if (scorerSupplier != null) {
scorerSupplier.setTopLevelScoringClause();
BulkScorer scorer = scorerSupplier.bulkScorer();
Expand Down
18 changes: 15 additions & 3 deletions lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java
Original file line number Diff line number Diff line change
Expand Up @@ -249,8 +249,9 @@ public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
}

@Override
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
LeafReader reader = context.reader();
public ScorerSupplier scorerSupplier(IndexSearcher.LeafReaderContextPartition partition)
throws IOException {
LeafReader reader = partition.ctx.reader();

PointValues values = reader.getPointValues(field);
if (checkValidPointValues(values) == false) {
Expand Down Expand Up @@ -298,7 +299,11 @@ public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOExcepti
} else {
return new ConstantScoreScorerSupplier(score(), scoreMode, reader.maxDoc()) {

final DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values);
// Create partition-aware DocIdSetBuilder that filters docs and uses partition-sized
// threshold
final DocIdSetBuilder result =
new DocIdSetBuilder(
reader.maxDoc(), values, partition.minDocId, partition.maxDocId);
final IntersectVisitor visitor = getIntersectVisitor(result);
long cost = -1;

Expand Down Expand Up @@ -336,6 +341,13 @@ public long cost() {
}
}

@Override
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
// Delegate to partition-aware version for entire segment
return scorerSupplier(
IndexSearcher.LeafReaderContextPartition.createForEntireSegment(context));
}

@Override
public int count(LeafReaderContext context) throws IOException {
LeafReader reader = context.reader();
Expand Down
25 changes: 25 additions & 0 deletions lucene/core/src/java/org/apache/lucene/search/Weight.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.IndexSearcher.LeafReaderContextPartition;
import org.apache.lucene.util.Bits;

/**
Expand Down Expand Up @@ -149,6 +150,30 @@ public final Scorer scorer(LeafReaderContext context) throws IOException {
*/
public abstract ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException;

/**
* Returns a {@link ScorerSupplier}, which can then be used to get a {@link Scorer} for a
* partition of a leaf reader context.
*
* <p>This method allows queries to optimize for intra-segment concurrency by knowing the specific
* doc ID range being searched within the segment. The default implementation delegates to {@link
* #scorerSupplier(LeafReaderContext)} ignoring the partition bounds. Queries that can benefit
* from partition awareness (e.g., by creating smaller data structures scoped to the partition)
* should override this method.
*
* <p>A scorer supplier for the same {@link LeafReaderContext} instance may be requested multiple
* times as part of a single search call, potentially from different threads searching different
* doc ID ranges concurrently.
*
* @param partition the leaf reader context partition containing the context and doc ID range
* @return a {@link ScorerSupplier} providing the scorer, or null if scorer is null
* @throws IOException if an IOException occurs
* @see LeafReaderContextPartition
* @since 10.1
*/
public ScorerSupplier scorerSupplier(LeafReaderContextPartition partition) throws IOException {
return scorerSupplier(partition.ctx);
}

/**
* Helper method that delegates to {@link #scorerSupplier(LeafReaderContext)}. It is implemented
* as
Expand Down
201 changes: 194 additions & 7 deletions lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,11 @@ public final class DocIdSetBuilder {
*
* @see DocIdSetBuilder#grow
*/
public sealed interface BulkAdder permits FixedBitSetAdder, BufferAdder {
public sealed interface BulkAdder
permits FixedBitSetAdder,
BufferAdder,
PartitionAwareFixedBitSetAdder,
PartitionAwareBufferAdder {
Comment on lines +44 to +48
Copy link
Member

@benwtrent benwtrent Nov 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is now megamorphic :(

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a good point. We should run the benchmark to quantify the impact due to virtual calls and megamorphism. Also assuming the impact is significant, I am wondering if we can use directly PartitionAwareFixedBitSetAdder instead of FixedBitSetAdder?

void add(int doc);

void add(IntsRef docs);
Expand Down Expand Up @@ -82,6 +86,59 @@ public void add(IntsRef docs, int docLowerBoundInclusive) {
}
}

/**
* Partition-aware FixedBitSetAdder that filters docs to only include those within the specified
* range. Stores docs using partition-relative indices (doc - offset) to save memory.
*
* @param bitSet the partition-sized bitset to store relative doc indices
* @param minDocId minimum doc ID (inclusive) to accept
* @param maxDocId maximum doc ID (exclusive) to accept
* @param offset the value to subtract from absolute doc IDs (typically minDocId)
*/
private record PartitionAwareFixedBitSetAdder(
FixedBitSet bitSet, int minDocId, int maxDocId, int offset) implements BulkAdder {

@Override
public void add(int doc) {
if (doc >= minDocId && doc < maxDocId) {
bitSet.set(doc - offset);
}
}

@Override
public void add(IntsRef docs) {
for (int i = docs.offset, to = docs.offset + docs.length; i < to; i++) {
int doc = docs.ints[i];
if (doc >= minDocId && doc < maxDocId) {
bitSet.set(doc - offset);
}
}
}

@Override
public void add(DocIdSetIterator iterator) throws IOException {
// Advance iterator to minDocId first
int doc = iterator.nextDoc();
if (doc < minDocId) {
doc = iterator.advance(minDocId);
}
// Use optimized intoBitSet with partition boundaries and offset
if (doc < maxDocId) {
iterator.intoBitSet(maxDocId, bitSet, offset);
}
}

@Override
public void add(IntsRef docs, int docLowerBoundInclusive) {
for (int i = docs.offset, to = docs.offset + docs.length; i < to; i++) {
int doc = docs.ints[i];
if (doc >= Math.max(docLowerBoundInclusive, minDocId) && doc < maxDocId) {
bitSet.set(doc - offset);
}
}
}
}

private static class Buffer {
int[] array;
int length;
Expand Down Expand Up @@ -131,12 +188,62 @@ public void add(IntsRef docs, int docLowerBoundInclusive) {
}
}

/**
* Partition-aware BufferAdder that filters docs to only include those within the specified range.
*/
private record PartitionAwareBufferAdder(Buffer buffer, int minDocId, int maxDocId)
implements BulkAdder {

@Override
public void add(int doc) {
if (doc >= minDocId && doc < maxDocId) {
buffer.array[buffer.length++] = doc;
}
}

@Override
public void add(IntsRef docs) {
int index = buffer.length;
for (int i = docs.offset, to = docs.offset + docs.length; i < to; i++) {
int doc = docs.ints[i];
if (doc >= minDocId && doc < maxDocId) {
buffer.array[index++] = doc;
}
}
buffer.length = index;
}

@Override
public void add(DocIdSetIterator iterator) throws IOException {
int docID;
while ((docID = iterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
add(docID);
}
}

@Override
public void add(IntsRef docs, int docLowerBoundInclusive) {
int index = buffer.length;
for (int i = docs.offset, to = docs.offset + docs.length; i < to; i++) {
int doc = docs.ints[i];
if (doc >= Math.max(docLowerBoundInclusive, minDocId) && doc < maxDocId) {
buffer.array[index++] = doc;
}
}
buffer.length = index;
}
}

private final int maxDoc;
private final int threshold;
// pkg-private for testing
final boolean multivalued;
final double numValuesPerDoc;

// Partition filtering support - filters docs to only include those within [minDocId, maxDocId)
private final int minDocId; // inclusive
private final int maxDocId; // exclusive

private List<Buffer> buffers = new ArrayList<>();
private int totalAllocated; // accumulated size of the allocated buffers

Expand Down Expand Up @@ -166,8 +273,57 @@ public DocIdSetBuilder(int maxDoc, PointValues values) throws IOException {
this(maxDoc, values.getDocCount(), values.size());
}

/**
* Create a partition-aware {@link DocIdSetBuilder} instance that only accepts doc IDs within the
* specified range. This is useful for intra-segment concurrency where each partition only needs
* to collect docs within its assigned range.
*
* @param maxDoc the maximum doc ID in the segment
* @param minDocId the minimum doc ID (inclusive) to accept
* @param maxDocId the maximum doc ID (exclusive) to accept
* @param docCount estimated document count
* @param valueCount estimated value count
*/
public DocIdSetBuilder(int maxDoc, int minDocId, int maxDocId, int docCount, long valueCount) {
this(maxDoc, docCount, valueCount, minDocId, maxDocId);
}

/**
* Create a partition-aware {@link DocIdSetBuilder} for {@link PointValues} that only accepts doc
* IDs within the specified range.
*
* @param maxDoc the maximum doc ID in the segment
* @param values the point values
* @param minDocId the minimum doc ID (inclusive) to accept
* @param maxDocId the maximum doc ID (exclusive) to accept
*/
public DocIdSetBuilder(int maxDoc, PointValues values, int minDocId, int maxDocId)
throws IOException {
this(maxDoc, values.getDocCount(), values.size(), minDocId, maxDocId);
}

/**
* Create a partition-aware {@link DocIdSetBuilder} for {@link Terms} that only accepts doc IDs
* within the specified range.
*
* @param maxDoc the maximum doc ID in the segment
* @param terms the terms
* @param minDocId the minimum doc ID (inclusive) to accept
* @param maxDocId the maximum doc ID (exclusive) to accept
*/
public DocIdSetBuilder(int maxDoc, Terms terms, int minDocId, int maxDocId) throws IOException {
this(maxDoc, terms.getDocCount(), terms.getSumDocFreq(), minDocId, maxDocId);
}

DocIdSetBuilder(int maxDoc, int docCount, long valueCount) {
this(maxDoc, docCount, valueCount, 0, maxDoc);
}

private DocIdSetBuilder(int maxDoc, int docCount, long valueCount, int minDocId, int maxDocId) {
this.maxDoc = maxDoc;
this.minDocId = minDocId;
this.maxDocId = maxDocId;

this.multivalued = docCount < 0 || docCount != valueCount;
if (docCount <= 0 || valueCount < 0) {
// assume one value per doc, this means the cost will be overestimated
Expand All @@ -184,7 +340,12 @@ public DocIdSetBuilder(int maxDoc, PointValues values) throws IOException {
// maxDoc >>> 7 is a good value if you want to save memory, lower values
// such as maxDoc >>> 11 should provide faster building but at the expense
// of using a full bitset even for quite sparse data
this.threshold = maxDoc >>> 7;
//
// When filtering to a partition (minDocId > 0 or maxDocId < maxDoc), use the partition size
// for threshold calculation to ensure the threshold scales correctly with the partition size
boolean isPartition = (minDocId > 0 || maxDocId < maxDoc);
int effectiveMaxDoc = isPartition ? (maxDocId - minDocId) : maxDoc;
this.threshold = effectiveMaxDoc >>> 7;

this.bitSet = null;
}
Expand Down Expand Up @@ -267,7 +428,12 @@ private int additionalCapacity(int numDocs) {
private Buffer addBuffer(int len) {
Buffer buffer = new Buffer(len);
buffers.add(buffer);
adder = new BufferAdder(buffer);
// Use partition-aware adder if filtering to a specific doc ID range
if (minDocId > 0 || maxDocId < maxDoc) {
adder = new PartitionAwareBufferAdder(buffer, minDocId, maxDocId);
} else {
adder = new BufferAdder(buffer);
}
totalAllocated += buffer.array.length;
return buffer;
}
Expand All @@ -279,20 +445,34 @@ private void growBuffer(Buffer buffer, int additionalCapacity) {

private void upgradeToBitSet() {
assert bitSet == null;
FixedBitSet bitSet = new FixedBitSet(maxDoc);

// For partitions, create a smaller bitset sized to the partition range only
// This saves memory by not allocating bits outside [minDocId, maxDocId)
boolean isPartition = (minDocId > 0 || maxDocId < maxDoc);
int bitSetSize = isPartition ? (maxDocId - minDocId) : maxDoc;

FixedBitSet bitSet = new FixedBitSet(bitSetSize);
long counter = 0;
for (Buffer buffer : buffers) {
int[] array = buffer.array;
int length = buffer.length;
counter += length;
for (int i = 0; i < length; ++i) {
bitSet.set(array[i]);
// For partitions, convert absolute doc ID to partition-relative index
int docId = array[i];
int bitIndex = isPartition ? (docId - minDocId) : docId;
bitSet.set(bitIndex);
}
}
this.bitSet = bitSet;
this.counter = counter;
this.buffers = null;
this.adder = new FixedBitSetAdder(bitSet);
// Use partition-aware adder if filtering to a specific doc ID range
if (isPartition) {
this.adder = new PartitionAwareFixedBitSetAdder(bitSet, minDocId, maxDocId, minDocId);
} else {
this.adder = new FixedBitSetAdder(bitSet);
}
}

/** Build a {@link DocIdSet} from the accumulated doc IDs. */
Expand All @@ -301,7 +481,14 @@ public DocIdSet build() {
if (bitSet != null) {
assert counter >= 0;
final long cost = Math.round(counter / numValuesPerDoc);
return new BitDocIdSet(bitSet, cost);

// For partition-relative bitsets, wrap with offset to return absolute doc IDs
boolean isPartition = (minDocId > 0 || maxDocId < maxDoc);
if (isPartition) {
return new OffsetBitDocIdSet(bitSet, cost, minDocId);
} else {
return new BitDocIdSet(bitSet, cost);
}
} else {
Buffer concatenated = concat(buffers);
LSBRadixSorter sorter = new LSBRadixSorter();
Expand Down
Loading