Use SkipBlockRangeIterator in TermOrdValComparator (#15696)

romseygeek · web-flow · commit 3f8207849cd7 · 2026-02-12T09:00:37.000Z
DocValuesRangeIterator will duplicate lots of work here, in the worst
case not doing any pruning at all but forcing all value comparisons
to be done twice on every document. Switch to using SkipBlockRangeIterator
instead, which only checks skip block boundaries and leaves all per-doc
checks to the LeafComparator itself.
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
@@ -359,7 +359,7 @@ Optimizations
 
 * GITHUB#15498: ExitableDirectoryReader keeps singleton SortedSetDocValues or SortedNumericDocValues as singletons. (Houston Putman)
 
-* GITHUB#15511: Dynamic pruning for SORTED(_SET) fields with doc values skipper (Pan Guixin)
+* GITHUB#15511, GITHUB#15696: Dynamic pruning for SORTED(_SET) fields with doc values skipper (Pan Guixin, Alan Woodward)
 
 * GITHUB#15560: Avoid unnecessary getGraph() call. (Shubham Chaudhary)
 
diff --git a/lucene/core/src/java/org/apache/lucene/search/comparators/TermOrdValComparator.java b/lucene/core/src/java/org/apache/lucene/search/comparators/TermOrdValComparator.java
@@ -30,13 +30,12 @@
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.search.AbstractDocIdSetIterator;
 import org.apache.lucene.search.DocIdSetIterator;
-import org.apache.lucene.search.DocValuesRangeIterator;
 import org.apache.lucene.search.FieldComparator;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.LeafFieldComparator;
 import org.apache.lucene.search.Pruning;
 import org.apache.lucene.search.Scorable;
-import org.apache.lucene.search.TwoPhaseIterator;
+import org.apache.lucene.search.SkipBlockRangeIterator;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefBuilder;
 import org.apache.lucene.util.PriorityQueue;
@@ -493,7 +492,7 @@ public DocIdSetIterator competitiveIterator() {
 
   private record PostingsEnumAndOrd(PostingsEnum postings, int ord) {}
 
-  private abstract class CompetitiveState {
+  private abstract static class CompetitiveState {
     final UpdateableDocIdSetIterator iterator;
 
     CompetitiveState(LeafReaderContext context) {
@@ -625,41 +624,18 @@ private void init(int minOrd, int maxOrd) throws IOException {
     }
   }
 
-  private class SkipperBasedCompetitiveState extends CompetitiveState {
+  private static class SkipperBasedCompetitiveState extends CompetitiveState {
     private final DocValuesSkipper skipper;
-    private final TwoPhaseIterator innerTwoPhase;
-    private int minOrd;
-    private int maxOrd;
 
-    SkipperBasedCompetitiveState(LeafReaderContext context, DocValuesSkipper skipper)
-        throws IOException {
+    SkipperBasedCompetitiveState(LeafReaderContext context, DocValuesSkipper skipper) {
       super(context);
       this.skipper = skipper;
       this.iterator.update(DocIdSetIterator.all(context.reader().maxDoc()));
-      final SortedDocValues docValues = getSortedDocValues(context, field);
-      this.innerTwoPhase =
-          new TwoPhaseIterator(docValues) {
-            @Override
-            public boolean matches() throws IOException {
-              final int cur = docValues.ordValue();
-              return cur >= minOrd && cur <= maxOrd;
-            }
-
-            @Override
-            public float matchCost() {
-              return 2;
-            }
-          };
     }
 
     @Override
-    public void update(int minOrd, int maxOrd) throws IOException {
-      this.minOrd = minOrd;
-      this.maxOrd = maxOrd;
-
-      final TwoPhaseIterator twoPhaseIterator =
-          new DocValuesRangeIterator(innerTwoPhase, skipper, minOrd, maxOrd, false);
-      iterator.update(TwoPhaseIterator.asDocIdSetIterator(twoPhaseIterator));
+    public void update(int minOrd, int maxOrd) {
+      iterator.update(new SkipBlockRangeIterator(skipper, minOrd, maxOrd));
     }
   }
 }
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java b/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java
@@ -1016,7 +1016,9 @@ private void testStringSortOptimization(
     final int numDocs = atLeast(10000);
     for (int i = 0; i < numDocs; ++i) {
       final Document doc = new Document();
-      final BytesRef value = new BytesRef(Integer.toString(random().nextInt(1000)));
+      // Random values that roughly correlate with index order, to make skipper implementation
+      // useful
+      final BytesRef value = new BytesRef(Integer.toString(random().nextInt(i, i + 20)));
       doc.add(fieldsBuilder.apply("my_field", value));
       writer.addDocument(doc);
       if (i == 7000) writer.flush(); // multiple segments
@@ -1043,14 +1045,18 @@ private void testStringSortOptimizationWithMissingValues(
     final Directory dir = newDirectory();
     final IndexWriter writer =
         new IndexWriter(dir, new IndexWriterConfig().setMergePolicy(newLogMergePolicy()));
-    final int numDocs = atLeast(10000);
+    // Larger number of documents as some of them are missing values and we still want
+    // to have multiple skipper blocks of 4096 entries.
+    final int numDocs = atLeast(30000);
     // one segment with all values missing to start with
     writer.addDocument(new Document());
     for (int i = 0; i < numDocs - 2; ++i) {
       if (i == 7000) writer.flush(); // multiple segments
       final Document doc = new Document();
       if (random().nextInt(2) == 0) {
-        final BytesRef value = new BytesRef(Integer.toString(random().nextInt(1000)));
+        // Random values that roughly correlate with index order, to make skipper implementation
+        // useful
+        final BytesRef value = new BytesRef(Integer.toString(random().nextInt(i, i + 20)));
         doc.add(fieldsBuilder.apply("my_field", value));
       }
       writer.addDocument(doc);