Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.common.lucene.search;

import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;

import java.util.Objects;

/**
* A {@link DocIdSetIterator} over set bits of a {@link Bits} instance.
*/
public final class BitsIterator extends DocIdSetIterator {

private static final int WINDOW_SIZE = 1024;

private final Bits bits;

private int doc = -1;
private final FixedBitSet bitSet;
private int from = 0;
private int to = 0;

public BitsIterator(Bits bits) {
this.bits = Objects.requireNonNull(bits);
// 1024 bits may sound heavy at first sight but it's only a long[16] under the hood
bitSet = new FixedBitSet(WINDOW_SIZE);
}

@Override
public int docID() {
return doc;
}

@Override
public int nextDoc() {
return advance(docID() + 1);
}

@Override
public int advance(int target) {
for (;;) {
if (target >= to) {
if (target >= bits.length()) {
return doc = NO_MORE_DOCS;
}
refill(target);
}

int next = bitSet.nextSetBit(target - from);
if (next != NO_MORE_DOCS) {
return doc = from + next;
} else {
target = to;
}
}
}

private void refill(int target) {
assert target >= to;
from = target;
bitSet.set(0, WINDOW_SIZE);
if (bits.length() - from < WINDOW_SIZE) {
to = bits.length();
bitSet.clear(to - from, WINDOW_SIZE);
} else {
to = from + WINDOW_SIZE;
}
Copy link
Contributor

@tvernum tvernum Sep 12, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason we use a mix of WINDOW_SIZE and bitSet.length() ?

They should be the same, and it looks like the code relies on them being the same, but perhaps you had a deeper for reason mixing them.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No reason! Let me use WINDOW_SIZE everywhere.

bits.applyMask(bitSet, from);
}

@Override
public long cost() {
// We have no better estimate
return bits.length();
}
}
127 changes: 0 additions & 127 deletions server/src/main/java/org/elasticsearch/lucene/util/CombinedBitSet.java

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.lucene.util;

import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;

/**
* A {@link Bits} implementation that combines two {@link Bits} instances by and-ing them to provide a single merged view.
*/
public final class CombinedBits implements Bits {
private final Bits first;
private final Bits second;
private final int length;

public CombinedBits(Bits first, Bits second) {
if (first.length() != second.length()) {
throw new IllegalArgumentException("Provided bits have different lengths: " + first.length() + " != " + second.length());
}
this.first = first;
this.second = second;
this.length = first.length();
}

@Override
public boolean get(int index) {
return first.get(index) && second.get(index);
}

@Override
public int length() {
return length;
}

@Override
public void applyMask(FixedBitSet bitSet, int offset) {
first.applyMask(bitSet, offset);
second.applyMask(bitSet, offset);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,9 @@
import org.apache.lucene.search.TermStatistics;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.SparseFixedBitSet;
import org.elasticsearch.common.lucene.search.BitsIterator;
import org.elasticsearch.core.Releasable;
import org.elasticsearch.lucene.util.CombinedBitSet;
import org.elasticsearch.search.dfs.AggregatedDfs;
import org.elasticsearch.search.profile.Timer;
import org.elasticsearch.search.profile.query.ProfileWeight;
Expand Down Expand Up @@ -454,8 +451,11 @@ protected void searchLeaf(LeafReaderContext ctx, int minDocId, int maxDocId, Wei
return;
}
Bits liveDocs = ctx.reader().getLiveDocs();
BitSet liveDocsBitSet = getSparseBitSetOrNull(liveDocs);
if (liveDocsBitSet == null) {
int numDocs = ctx.reader().numDocs();
// This threshold comes from the previous heuristic that checked whether the BitSet was a SparseFixedBitSet, which uses this
// threshold at creation time. But a higher threshold would likely perform better?
int threshold = ctx.reader().maxDoc() >> 7;
if (numDocs >= threshold) {
BulkScorer bulkScorer = weight.bulkScorer(ctx);
if (bulkScorer != null) {
if (cancellable.isEnabled()) {
Expand All @@ -475,7 +475,7 @@ protected void searchLeaf(LeafReaderContext ctx, int minDocId, int maxDocId, Wei
try {
intersectScorerAndBitSet(
scorer,
liveDocsBitSet,
liveDocs,
leafCollector,
this.cancellable.isEnabled() ? cancellable::checkCancelled : () -> {}
);
Expand All @@ -490,27 +490,10 @@ protected void searchLeaf(LeafReaderContext ctx, int minDocId, int maxDocId, Wei
leafCollector.finish();
}

private static BitSet getSparseBitSetOrNull(Bits liveDocs) {
if (liveDocs instanceof SparseFixedBitSet) {
return (BitSet) liveDocs;
} else if (liveDocs instanceof CombinedBitSet
// if the underlying role bitset is sparse
&& ((CombinedBitSet) liveDocs).getFirst() instanceof SparseFixedBitSet) {
return (BitSet) liveDocs;
} else {
return null;
}

}

static void intersectScorerAndBitSet(Scorer scorer, BitSet acceptDocs, LeafCollector collector, Runnable checkCancelled)
static void intersectScorerAndBitSet(Scorer scorer, Bits acceptDocs, LeafCollector collector, Runnable checkCancelled)
throws IOException {
collector.setScorer(scorer);
// ConjunctionDISI uses the DocIdSetIterator#cost() to order the iterators, so if roleBits has the lowest cardinality it should
// be used first:
DocIdSetIterator iterator = ConjunctionUtils.intersectIterators(
Arrays.asList(new BitSetIterator(acceptDocs, acceptDocs.approximateCardinality()), scorer.iterator())
);
DocIdSetIterator iterator = ConjunctionUtils.intersectIterators(Arrays.asList(new BitsIterator(acceptDocs), scorer.iterator()));
int seen = 0;
checkCancelled.run();
for (int docId = iterator.nextDoc(); docId < DocIdSetIterator.NO_MORE_DOCS; docId = iterator.nextDoc()) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.common.lucene.search;

import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;
import org.elasticsearch.test.ESTestCase;

public class BitsIteratorTests extends ESTestCase {

public void testEmpty() {
Bits bits = new Bits.MatchNoBits(10_000);
BitsIterator iterator = new BitsIterator((bits));
assertEquals(DocIdSetIterator.NO_MORE_DOCS, iterator.nextDoc());
}

public void testSingleBit() {
FixedBitSet bits = new FixedBitSet(10_000);
bits.set(5000);

BitsIterator iterator = new BitsIterator((bits));
assertEquals(5000, iterator.nextDoc());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, iterator.nextDoc());

iterator = new BitsIterator((bits));
assertEquals(5000, iterator.advance(5000));

iterator = new BitsIterator((bits));
assertEquals(DocIdSetIterator.NO_MORE_DOCS, iterator.advance(5001));
}

public void testEverySecondBit() {
FixedBitSet bits = new FixedBitSet(10_000);
for (int i = 0; i < bits.length(); i += 2) {
bits.set(i);
}
BitsIterator iterator = new BitsIterator((bits));
for (int i = 0; i < bits.length(); i += 2) {
assertEquals(i, iterator.nextDoc());
}
assertEquals(DocIdSetIterator.NO_MORE_DOCS, iterator.nextDoc());
}
}
Loading