Skip to content

Commit 8e46154

Browse files
authored
Catch up DLS with recent Lucene changes (#133966)
1 parent cf94ea9 commit 8e46154

File tree

8 files changed

+233
-175
lines changed

8 files changed

+233
-175
lines changed
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.common.lucene.search;
11+
12+
import org.apache.lucene.search.DocIdSetIterator;
13+
import org.apache.lucene.util.Bits;
14+
import org.apache.lucene.util.FixedBitSet;
15+
16+
import java.util.Objects;
17+
18+
/**
19+
* A {@link DocIdSetIterator} over set bits of a {@link Bits} instance.
20+
*/
21+
public final class BitsIterator extends DocIdSetIterator {
22+
23+
private static final int WINDOW_SIZE = 1024;
24+
25+
private final Bits bits;
26+
27+
private int doc = -1;
28+
private final FixedBitSet bitSet;
29+
private int from = 0;
30+
private int to = 0;
31+
32+
public BitsIterator(Bits bits) {
33+
this.bits = Objects.requireNonNull(bits);
34+
// 1024 bits may sound heavy at first sight but it's only a long[16] under the hood
35+
bitSet = new FixedBitSet(WINDOW_SIZE);
36+
}
37+
38+
@Override
39+
public int docID() {
40+
return doc;
41+
}
42+
43+
@Override
44+
public int nextDoc() {
45+
return advance(docID() + 1);
46+
}
47+
48+
@Override
49+
public int advance(int target) {
50+
for (;;) {
51+
if (target >= to) {
52+
if (target >= bits.length()) {
53+
return doc = NO_MORE_DOCS;
54+
}
55+
refill(target);
56+
}
57+
58+
int next = bitSet.nextSetBit(target - from);
59+
if (next != NO_MORE_DOCS) {
60+
return doc = from + next;
61+
} else {
62+
target = to;
63+
}
64+
}
65+
}
66+
67+
private void refill(int target) {
68+
assert target >= to;
69+
from = target;
70+
bitSet.set(0, WINDOW_SIZE);
71+
if (bits.length() - from < WINDOW_SIZE) {
72+
to = bits.length();
73+
bitSet.clear(to - from, WINDOW_SIZE);
74+
} else {
75+
to = from + WINDOW_SIZE;
76+
}
77+
bits.applyMask(bitSet, from);
78+
}
79+
80+
@Override
81+
public long cost() {
82+
// We have no better estimate
83+
return bits.length();
84+
}
85+
}

server/src/main/java/org/elasticsearch/lucene/util/CombinedBitSet.java

Lines changed: 0 additions & 127 deletions
This file was deleted.
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.lucene.util;
11+
12+
import org.apache.lucene.util.Bits;
13+
import org.apache.lucene.util.FixedBitSet;
14+
15+
/**
16+
* A {@link Bits} implementation that combines two {@link Bits} instances by and-ing them to provide a single merged view.
17+
*/
18+
public final class CombinedBits implements Bits {
19+
private final Bits first;
20+
private final Bits second;
21+
private final int length;
22+
23+
public CombinedBits(Bits first, Bits second) {
24+
if (first.length() != second.length()) {
25+
throw new IllegalArgumentException("Provided bits have different lengths: " + first.length() + " != " + second.length());
26+
}
27+
this.first = first;
28+
this.second = second;
29+
this.length = first.length();
30+
}
31+
32+
@Override
33+
public boolean get(int index) {
34+
return first.get(index) && second.get(index);
35+
}
36+
37+
@Override
38+
public int length() {
39+
return length;
40+
}
41+
42+
@Override
43+
public void applyMask(FixedBitSet bitSet, int offset) {
44+
first.applyMask(bitSet, offset);
45+
second.applyMask(bitSet, offset);
46+
}
47+
}

server/src/main/java/org/elasticsearch/search/internal/ContextIndexSearcher.java

Lines changed: 9 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,9 @@
3232
import org.apache.lucene.search.TermStatistics;
3333
import org.apache.lucene.search.Weight;
3434
import org.apache.lucene.search.similarities.Similarity;
35-
import org.apache.lucene.util.BitSet;
36-
import org.apache.lucene.util.BitSetIterator;
3735
import org.apache.lucene.util.Bits;
38-
import org.apache.lucene.util.SparseFixedBitSet;
36+
import org.elasticsearch.common.lucene.search.BitsIterator;
3937
import org.elasticsearch.core.Releasable;
40-
import org.elasticsearch.lucene.util.CombinedBitSet;
4138
import org.elasticsearch.search.dfs.AggregatedDfs;
4239
import org.elasticsearch.search.profile.Timer;
4340
import org.elasticsearch.search.profile.query.ProfileWeight;
@@ -454,8 +451,11 @@ protected void searchLeaf(LeafReaderContext ctx, int minDocId, int maxDocId, Wei
454451
return;
455452
}
456453
Bits liveDocs = ctx.reader().getLiveDocs();
457-
BitSet liveDocsBitSet = getSparseBitSetOrNull(liveDocs);
458-
if (liveDocsBitSet == null) {
454+
int numDocs = ctx.reader().numDocs();
455+
// This threshold comes from the previous heuristic that checked whether the BitSet was a SparseFixedBitSet, which uses this
456+
// threshold at creation time. But a higher threshold would likely perform better?
457+
int threshold = ctx.reader().maxDoc() >> 7;
458+
if (numDocs >= threshold) {
459459
BulkScorer bulkScorer = weight.bulkScorer(ctx);
460460
if (bulkScorer != null) {
461461
if (cancellable.isEnabled()) {
@@ -475,7 +475,7 @@ protected void searchLeaf(LeafReaderContext ctx, int minDocId, int maxDocId, Wei
475475
try {
476476
intersectScorerAndBitSet(
477477
scorer,
478-
liveDocsBitSet,
478+
liveDocs,
479479
leafCollector,
480480
this.cancellable.isEnabled() ? cancellable::checkCancelled : () -> {}
481481
);
@@ -490,27 +490,10 @@ protected void searchLeaf(LeafReaderContext ctx, int minDocId, int maxDocId, Wei
490490
leafCollector.finish();
491491
}
492492

493-
private static BitSet getSparseBitSetOrNull(Bits liveDocs) {
494-
if (liveDocs instanceof SparseFixedBitSet) {
495-
return (BitSet) liveDocs;
496-
} else if (liveDocs instanceof CombinedBitSet
497-
// if the underlying role bitset is sparse
498-
&& ((CombinedBitSet) liveDocs).getFirst() instanceof SparseFixedBitSet) {
499-
return (BitSet) liveDocs;
500-
} else {
501-
return null;
502-
}
503-
504-
}
505-
506-
static void intersectScorerAndBitSet(Scorer scorer, BitSet acceptDocs, LeafCollector collector, Runnable checkCancelled)
493+
static void intersectScorerAndBitSet(Scorer scorer, Bits acceptDocs, LeafCollector collector, Runnable checkCancelled)
507494
throws IOException {
508495
collector.setScorer(scorer);
509-
// ConjunctionDISI uses the DocIdSetIterator#cost() to order the iterators, so if roleBits has the lowest cardinality it should
510-
// be used first:
511-
DocIdSetIterator iterator = ConjunctionUtils.intersectIterators(
512-
Arrays.asList(new BitSetIterator(acceptDocs, acceptDocs.approximateCardinality()), scorer.iterator())
513-
);
496+
DocIdSetIterator iterator = ConjunctionUtils.intersectIterators(Arrays.asList(new BitsIterator(acceptDocs), scorer.iterator()));
514497
int seen = 0;
515498
checkCancelled.run();
516499
for (int docId = iterator.nextDoc(); docId < DocIdSetIterator.NO_MORE_DOCS; docId = iterator.nextDoc()) {
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.common.lucene.search;
11+
12+
import org.apache.lucene.search.DocIdSetIterator;
13+
import org.apache.lucene.util.Bits;
14+
import org.apache.lucene.util.FixedBitSet;
15+
import org.elasticsearch.test.ESTestCase;
16+
17+
public class BitsIteratorTests extends ESTestCase {
18+
19+
public void testEmpty() {
20+
Bits bits = new Bits.MatchNoBits(10_000);
21+
BitsIterator iterator = new BitsIterator((bits));
22+
assertEquals(DocIdSetIterator.NO_MORE_DOCS, iterator.nextDoc());
23+
}
24+
25+
public void testSingleBit() {
26+
FixedBitSet bits = new FixedBitSet(10_000);
27+
bits.set(5000);
28+
29+
BitsIterator iterator = new BitsIterator((bits));
30+
assertEquals(5000, iterator.nextDoc());
31+
assertEquals(DocIdSetIterator.NO_MORE_DOCS, iterator.nextDoc());
32+
33+
iterator = new BitsIterator((bits));
34+
assertEquals(5000, iterator.advance(5000));
35+
36+
iterator = new BitsIterator((bits));
37+
assertEquals(DocIdSetIterator.NO_MORE_DOCS, iterator.advance(5001));
38+
}
39+
40+
public void testEverySecondBit() {
41+
FixedBitSet bits = new FixedBitSet(10_000);
42+
for (int i = 0; i < bits.length(); i += 2) {
43+
bits.set(i);
44+
}
45+
BitsIterator iterator = new BitsIterator((bits));
46+
for (int i = 0; i < bits.length(); i += 2) {
47+
assertEquals(i, iterator.nextDoc());
48+
}
49+
assertEquals(DocIdSetIterator.NO_MORE_DOCS, iterator.nextDoc());
50+
}
51+
}

0 commit comments

Comments
 (0)