Skip to content

Commit 3f82078

Browse files
authored
Use SkipBlockRangeIterator in TermOrdValComparator (#15696)
DocValuesRangeIterator will duplicate lots of work here, in the worst case not doing any pruning at all but forcing all value comparisons to be done twice on every document. Switch to using SkipBlockRangeIterator instead, which only checks skip block boundaries and leaves all per-doc checks to the LeafComparator itself.
1 parent 448c7d8 commit 3f82078

File tree

3 files changed

+16
-34
lines changed

3 files changed

+16
-34
lines changed

lucene/CHANGES.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -359,7 +359,7 @@ Optimizations
359359

360360
* GITHUB#15498: ExitableDirectoryReader keeps singleton SortedSetDocValues or SortedNumericDocValues as singletons. (Houston Putman)
361361

362-
* GITHUB#15511: Dynamic pruning for SORTED(_SET) fields with doc values skipper (Pan Guixin)
362+
* GITHUB#15511, GITHUB#15696: Dynamic pruning for SORTED(_SET) fields with doc values skipper (Pan Guixin, Alan Woodward)
363363

364364
* GITHUB#15560: Avoid unnecessary getGraph() call. (Shubham Chaudhary)
365365

lucene/core/src/java/org/apache/lucene/search/comparators/TermOrdValComparator.java

Lines changed: 6 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,12 @@
3030
import org.apache.lucene.index.TermsEnum;
3131
import org.apache.lucene.search.AbstractDocIdSetIterator;
3232
import org.apache.lucene.search.DocIdSetIterator;
33-
import org.apache.lucene.search.DocValuesRangeIterator;
3433
import org.apache.lucene.search.FieldComparator;
3534
import org.apache.lucene.search.IndexSearcher;
3635
import org.apache.lucene.search.LeafFieldComparator;
3736
import org.apache.lucene.search.Pruning;
3837
import org.apache.lucene.search.Scorable;
39-
import org.apache.lucene.search.TwoPhaseIterator;
38+
import org.apache.lucene.search.SkipBlockRangeIterator;
4039
import org.apache.lucene.util.BytesRef;
4140
import org.apache.lucene.util.BytesRefBuilder;
4241
import org.apache.lucene.util.PriorityQueue;
@@ -493,7 +492,7 @@ public DocIdSetIterator competitiveIterator() {
493492

494493
private record PostingsEnumAndOrd(PostingsEnum postings, int ord) {}
495494

496-
private abstract class CompetitiveState {
495+
private abstract static class CompetitiveState {
497496
final UpdateableDocIdSetIterator iterator;
498497

499498
CompetitiveState(LeafReaderContext context) {
@@ -625,41 +624,18 @@ private void init(int minOrd, int maxOrd) throws IOException {
625624
}
626625
}
627626

628-
private class SkipperBasedCompetitiveState extends CompetitiveState {
627+
private static class SkipperBasedCompetitiveState extends CompetitiveState {
629628
private final DocValuesSkipper skipper;
630-
private final TwoPhaseIterator innerTwoPhase;
631-
private int minOrd;
632-
private int maxOrd;
633629

634-
SkipperBasedCompetitiveState(LeafReaderContext context, DocValuesSkipper skipper)
635-
throws IOException {
630+
SkipperBasedCompetitiveState(LeafReaderContext context, DocValuesSkipper skipper) {
636631
super(context);
637632
this.skipper = skipper;
638633
this.iterator.update(DocIdSetIterator.all(context.reader().maxDoc()));
639-
final SortedDocValues docValues = getSortedDocValues(context, field);
640-
this.innerTwoPhase =
641-
new TwoPhaseIterator(docValues) {
642-
@Override
643-
public boolean matches() throws IOException {
644-
final int cur = docValues.ordValue();
645-
return cur >= minOrd && cur <= maxOrd;
646-
}
647-
648-
@Override
649-
public float matchCost() {
650-
return 2;
651-
}
652-
};
653634
}
654635

655636
@Override
656-
public void update(int minOrd, int maxOrd) throws IOException {
657-
this.minOrd = minOrd;
658-
this.maxOrd = maxOrd;
659-
660-
final TwoPhaseIterator twoPhaseIterator =
661-
new DocValuesRangeIterator(innerTwoPhase, skipper, minOrd, maxOrd, false);
662-
iterator.update(TwoPhaseIterator.asDocIdSetIterator(twoPhaseIterator));
637+
public void update(int minOrd, int maxOrd) {
638+
iterator.update(new SkipBlockRangeIterator(skipper, minOrd, maxOrd));
663639
}
664640
}
665641
}

lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1016,7 +1016,9 @@ private void testStringSortOptimization(
10161016
final int numDocs = atLeast(10000);
10171017
for (int i = 0; i < numDocs; ++i) {
10181018
final Document doc = new Document();
1019-
final BytesRef value = new BytesRef(Integer.toString(random().nextInt(1000)));
1019+
// Random values that roughly correlate with index order, to make skipper implementation
1020+
// useful
1021+
final BytesRef value = new BytesRef(Integer.toString(random().nextInt(i, i + 20)));
10201022
doc.add(fieldsBuilder.apply("my_field", value));
10211023
writer.addDocument(doc);
10221024
if (i == 7000) writer.flush(); // multiple segments
@@ -1043,14 +1045,18 @@ private void testStringSortOptimizationWithMissingValues(
10431045
final Directory dir = newDirectory();
10441046
final IndexWriter writer =
10451047
new IndexWriter(dir, new IndexWriterConfig().setMergePolicy(newLogMergePolicy()));
1046-
final int numDocs = atLeast(10000);
1048+
// Larger number of documents as some of them are missing values and we still want
1049+
// to have multiple skipper blocks of 4096 entries.
1050+
final int numDocs = atLeast(30000);
10471051
// one segment with all values missing to start with
10481052
writer.addDocument(new Document());
10491053
for (int i = 0; i < numDocs - 2; ++i) {
10501054
if (i == 7000) writer.flush(); // multiple segments
10511055
final Document doc = new Document();
10521056
if (random().nextInt(2) == 0) {
1053-
final BytesRef value = new BytesRef(Integer.toString(random().nextInt(1000)));
1057+
// Random values that roughly correlate with index order, to make skipper implementation
1058+
// useful
1059+
final BytesRef value = new BytesRef(Integer.toString(random().nextInt(i, i + 20)));
10541060
doc.add(fieldsBuilder.apply("my_field", value));
10551061
}
10561062
writer.addDocument(doc);

0 commit comments

Comments
 (0)