Skip to content

Commit 9753c56

Browse files
authored
Lucene#asSequentialBits gets the leadCost backwards. (#48335) (#48405)
The comment says it needs random-access, but it passes `Long#MAX_VALUE` as the lead cost, which forces sequential access, it should pass `0` instead. I took advantage of this fix to improve the logic to leverage an estimation of the number of times that `Bits#get` gets called to make better decisions.
1 parent 9e1117f commit 9753c56

File tree

3 files changed

+119
-5
lines changed

3 files changed

+119
-5
lines changed

server/src/main/java/org/elasticsearch/common/lucene/Lucene.java

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -816,16 +816,27 @@ public void delete() {
816816
}
817817

818818
/**
819-
* Given a {@link ScorerSupplier}, return a {@link Bits} instance that will match
820-
* all documents contained in the set. Note that the returned {@link Bits}
821-
* instance MUST be consumed in order.
819+
* Return a {@link Bits} view of the provided scorer.
820+
* <b>NOTE</b>: that the returned {@link Bits} instance MUST be consumed in order.
821+
* @see #asSequentialAccessBits(int, ScorerSupplier, long)
822822
*/
823823
public static Bits asSequentialAccessBits(final int maxDoc, @Nullable ScorerSupplier scorerSupplier) throws IOException {
824+
return asSequentialAccessBits(maxDoc, scorerSupplier, 0L);
825+
}
826+
827+
/**
828+
* Given a {@link ScorerSupplier}, return a {@link Bits} instance that will match
829+
* all documents contained in the set.
830+
* <b>NOTE</b>: that the returned {@link Bits} instance MUST be consumed in order.
831+
* @param estimatedGetCount an estimation of the number of times that {@link Bits#get} will get called
832+
*/
833+
public static Bits asSequentialAccessBits(final int maxDoc, @Nullable ScorerSupplier scorerSupplier,
834+
long estimatedGetCount) throws IOException {
824835
if (scorerSupplier == null) {
825836
return new Bits.MatchNoBits(maxDoc);
826837
}
827838
// Since we want bits, we need random-access
828-
final Scorer scorer = scorerSupplier.get(Long.MAX_VALUE); // this never returns null
839+
final Scorer scorer = scorerSupplier.get(estimatedGetCount); // this never returns null
829840
final TwoPhaseIterator twoPhase = scorer.twoPhaseIterator();
830841
final DocIdSetIterator iterator;
831842
if (twoPhase == null) {

server/src/main/java/org/elasticsearch/common/lucene/search/function/FunctionScoreQuery.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -269,14 +269,15 @@ private FunctionFactorScorer functionScorer(LeafReaderContext context) throws IO
269269
if (subQueryScorer == null) {
270270
return null;
271271
}
272+
final long leadCost = subQueryScorer.iterator().cost();
272273
final LeafScoreFunction[] leafFunctions = new LeafScoreFunction[functions.length];
273274
final Bits[] docSets = new Bits[functions.length];
274275
for (int i = 0; i < functions.length; i++) {
275276
ScoreFunction function = functions[i];
276277
leafFunctions[i] = function.getLeafScoreFunction(context);
277278
if (filterWeights[i] != null) {
278279
ScorerSupplier filterScorerSupplier = filterWeights[i].scorerSupplier(context);
279-
docSets[i] = Lucene.asSequentialAccessBits(context.reader().maxDoc(), filterScorerSupplier);
280+
docSets[i] = Lucene.asSequentialAccessBits(context.reader().maxDoc(), filterScorerSupplier, leadCost);
280281
} else {
281282
docSets[i] = new Bits.MatchAllBits(context.reader().maxDoc());
282283
}

server/src/test/java/org/elasticsearch/common/lucene/LuceneTests.java

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,11 @@
2424
import org.apache.lucene.document.Field;
2525
import org.apache.lucene.document.Field.Store;
2626
import org.apache.lucene.document.LatLonDocValuesField;
27+
import org.apache.lucene.document.NumericDocValuesField;
2728
import org.apache.lucene.document.StringField;
2829
import org.apache.lucene.document.TextField;
2930
import org.apache.lucene.index.DirectoryReader;
31+
import org.apache.lucene.index.IndexReader;
3032
import org.apache.lucene.index.IndexWriter;
3133
import org.apache.lucene.index.IndexWriterConfig;
3234
import org.apache.lucene.index.LeafReaderContext;
@@ -36,10 +38,15 @@
3638
import org.apache.lucene.index.SegmentInfos;
3739
import org.apache.lucene.index.SoftDeletesRetentionMergePolicy;
3840
import org.apache.lucene.index.Term;
41+
import org.apache.lucene.search.Explanation;
42+
import org.apache.lucene.search.IndexOrDocValuesQuery;
3943
import org.apache.lucene.search.IndexSearcher;
4044
import org.apache.lucene.search.MatchAllDocsQuery;
45+
import org.apache.lucene.search.Query;
4146
import org.apache.lucene.search.ScoreDoc;
4247
import org.apache.lucene.search.ScoreMode;
48+
import org.apache.lucene.search.Scorer;
49+
import org.apache.lucene.search.ScorerSupplier;
4350
import org.apache.lucene.search.SortField;
4451
import org.apache.lucene.search.SortedNumericSortField;
4552
import org.apache.lucene.search.SortedSetSelector;
@@ -420,6 +427,101 @@ public void testAsSequentialAccessBits() throws Exception {
420427
dir.close();
421428
}
422429

430+
private static class UnsupportedQuery extends Query {
431+
432+
@Override
433+
public String toString(String field) {
434+
return "Unsupported";
435+
}
436+
437+
@Override
438+
public boolean equals(Object obj) {
439+
return obj instanceof UnsupportedQuery;
440+
}
441+
442+
@Override
443+
public int hashCode() {
444+
return 42;
445+
}
446+
447+
@Override
448+
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
449+
return new Weight(this) {
450+
451+
@Override
452+
public boolean isCacheable(LeafReaderContext ctx) {
453+
return true;
454+
}
455+
456+
@Override
457+
public void extractTerms(Set<Term> terms) {
458+
throw new UnsupportedOperationException();
459+
}
460+
461+
@Override
462+
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
463+
throw new UnsupportedOperationException();
464+
}
465+
466+
@Override
467+
public Scorer scorer(LeafReaderContext context) throws IOException {
468+
throw new UnsupportedOperationException();
469+
}
470+
471+
@Override
472+
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
473+
return new ScorerSupplier() {
474+
475+
@Override
476+
public Scorer get(long leadCost) throws IOException {
477+
throw new UnsupportedOperationException();
478+
}
479+
480+
@Override
481+
public long cost() {
482+
return context.reader().maxDoc();
483+
}
484+
485+
};
486+
}
487+
488+
};
489+
}
490+
491+
}
492+
493+
public void testAsSequentialBitsUsesRandomAccess() throws IOException {
494+
try (Directory dir = newDirectory()) {
495+
try (IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new KeywordAnalyzer()))) {
496+
Document doc = new Document();
497+
doc.add(new NumericDocValuesField("foo", 5L));
498+
// we need more than 8 documents because doc values are artificially penalized by IndexOrDocValuesQuery
499+
for (int i = 0; i < 10; ++i) {
500+
w.addDocument(doc);
501+
}
502+
w.forceMerge(1);
503+
try (IndexReader reader = DirectoryReader.open(w)) {
504+
IndexSearcher searcher = newSearcher(reader);
505+
searcher.setQueryCache(null);
506+
Query query = new IndexOrDocValuesQuery(
507+
new UnsupportedQuery(), NumericDocValuesField.newSlowRangeQuery("foo", 3L, 5L));
508+
Weight weight = searcher.createWeight(query, ScoreMode.COMPLETE_NO_SCORES, 1f);
509+
510+
// Random access by default
511+
ScorerSupplier scorerSupplier = weight.scorerSupplier(reader.leaves().get(0));
512+
Bits bits = Lucene.asSequentialAccessBits(reader.maxDoc(), scorerSupplier);
513+
assertNotNull(bits);
514+
assertTrue(bits.get(0));
515+
516+
// Moves to sequential access if Bits#get is called more than the number of matches
517+
ScorerSupplier scorerSupplier2 = weight.scorerSupplier(reader.leaves().get(0));
518+
expectThrows(UnsupportedOperationException.class,
519+
() -> Lucene.asSequentialAccessBits(reader.maxDoc(), scorerSupplier2, reader.maxDoc()));
520+
}
521+
}
522+
}
523+
}
524+
423525
/**
424526
* Test that the "unmap hack" is detected as supported by lucene.
425527
* This works around the following bug: https://bugs.openjdk.java.net/browse/JDK-4724038

0 commit comments

Comments
 (0)