Skip to content

Commit b2c92b9

Browse files
authored
Reduce polymorphism of DocIdSetIterator#docID(). (#14453)
This introduces a new `AbstractDocIdSetIterator` abstract class that tracks the current doc IDs and refactors several `DocIdSetIterator` implementations to extend either this `AbstractDocIdSetIterator` or `FilterDocIdSetIterator`, in order to reduce polymorphism of call sites of `DocIdSetIterator#docID()`.
1 parent dda4042 commit b2c92b9

30 files changed

+123
-295
lines changed

lucene/core/src/java/org/apache/lucene/codecs/lucene90/IndexedDISI.java

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import java.io.DataInput;
2020
import java.io.IOException;
2121
import org.apache.lucene.index.KnnVectorValues;
22+
import org.apache.lucene.search.AbstractDocIdSetIterator;
2223
import org.apache.lucene.search.DocIdSetIterator;
2324
import org.apache.lucene.store.IndexInput;
2425
import org.apache.lucene.store.IndexOutput;
@@ -91,7 +92,7 @@
9192
*
9293
* @lucene.internal
9394
*/
94-
public final class IndexedDISI extends DocIdSetIterator {
95+
public final class IndexedDISI extends AbstractDocIdSetIterator {
9596

9697
// jump-table time/space trade-offs to consider:
9798
// The block offsets and the block indexes could be stored in more compressed form with
@@ -422,7 +423,6 @@ public static RandomAccessInput createJumpTable(
422423
int nextBlockIndex = -1;
423424
Method method;
424425

425-
int doc = -1;
426426
int index = -1;
427427

428428
// SPARSE variables
@@ -474,11 +474,6 @@ public long cost() {
474474
};
475475
}
476476

477-
@Override
478-
public int docID() {
479-
return doc;
480-
}
481-
482477
@Override
483478
public int advance(int target) throws IOException {
484479
final int targetBlock = target & 0xFFFF0000;
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.lucene.search;
18+
19+
/**
20+
* Abstract implementation of a {@link DocIdSetIterator} that tracks the current doc ID.
21+
* Implementing {@link DocIdSetIterator} by extending either this class or {@link
22+
* FilterDocIdSetIterator} is a good idea in order to reduce polymorphism of call sites to {@link
23+
* DocIdSetIterator#docID()}.
24+
*/
25+
public abstract class AbstractDocIdSetIterator extends DocIdSetIterator {
26+
27+
/** The current doc ID, initialized at -1. */
28+
protected int doc = -1;
29+
30+
/** Sole constructor, invoked by sub-classes. */
31+
protected AbstractDocIdSetIterator() {}
32+
33+
@Override
34+
public final int docID() {
35+
return doc;
36+
}
37+
}

lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionScorer.java

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -97,21 +97,11 @@ public DocIdSetIterator iterator() {
9797
private DocIdSetIterator approximation() {
9898
final DocIdSetIterator lead = approximations[0];
9999

100-
return new DocIdSetIterator() {
100+
return new FilterDocIdSetIterator(lead) {
101101

102102
float maxScore;
103103
int upTo = -1;
104104

105-
@Override
106-
public int docID() {
107-
return lead.docID();
108-
}
109-
110-
@Override
111-
public long cost() {
112-
return lead.cost();
113-
}
114-
115105
private void moveToNextBlock(int target) throws IOException {
116106
if (minScore == 0) {
117107
upTo = target;

lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java

Lines changed: 6 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
*
3434
* @lucene.internal
3535
*/
36-
final class ConjunctionDISI extends DocIdSetIterator {
36+
final class ConjunctionDISI extends FilterDocIdSetIterator {
3737

3838
/**
3939
* Adds the scorer, possibly splitting up into two phases or collapsing if it is another
@@ -127,6 +127,8 @@ static DocIdSetIterator createConjunction(
127127
if (iterators.size() == 1) {
128128
disi = iterators.get(0);
129129
} else {
130+
// Sort the list first to allow the sparser iterator to lead the matching.
131+
CollectionUtil.timSort(iterators, (o1, o2) -> Long.compare(o1.cost(), o2.cost()));
130132
disi = new ConjunctionDISI(iterators);
131133
}
132134

@@ -152,11 +154,9 @@ private static void throwSubIteratorsNotOnSameDocument() {
152154
final DocIdSetIterator[] others;
153155

154156
private ConjunctionDISI(List<? extends DocIdSetIterator> iterators) {
157+
super(iterators.get(0));
155158
assert iterators.size() >= 2;
156159

157-
// Sort the array the first time to allow the least frequent DocsEnum to
158-
// lead the matching.
159-
CollectionUtil.timSort(iterators, (o1, o2) -> Long.compare(o1.cost(), o2.cost()));
160160
lead1 = iterators.get(0);
161161
lead2 = iterators.get(1);
162162
others = iterators.subList(2, iterators.size()).toArray(new DocIdSetIterator[0]);
@@ -205,23 +205,13 @@ assert assertItersOnSameDoc()
205205
return doNext(lead1.advance(target));
206206
}
207207

208-
@Override
209-
public int docID() {
210-
return lead1.docID();
211-
}
212-
213208
@Override
214209
public int nextDoc() throws IOException {
215210
assert assertItersOnSameDoc()
216211
: "Sub-iterators of ConjunctionDISI are not on the same document!";
217212
return doNext(lead1.nextDoc());
218213
}
219214

220-
@Override
221-
public long cost() {
222-
return lead1.cost(); // overestimate
223-
}
224-
225215
// Returns {@code true} if all sub-iterators are on the same doc ID, {@code false} otherwise
226216
private boolean assertItersOnSameDoc() {
227217
int curDoc = lead1.docID();
@@ -233,14 +223,15 @@ private boolean assertItersOnSameDoc() {
233223
}
234224

235225
/** Conjunction between a {@link DocIdSetIterator} and one or more {@link BitSetIterator}s. */
236-
private static class BitSetConjunctionDISI extends DocIdSetIterator {
226+
private static class BitSetConjunctionDISI extends FilterDocIdSetIterator {
237227

238228
private final DocIdSetIterator lead;
239229
private final BitSetIterator[] bitSetIterators;
240230
private final BitSet[] bitSets;
241231
private final int minLength;
242232

243233
BitSetConjunctionDISI(DocIdSetIterator lead, Collection<BitSetIterator> bitSetIterators) {
234+
super(lead);
244235
this.lead = lead;
245236
assert bitSetIterators.size() > 0;
246237

@@ -257,11 +248,6 @@ private static class BitSetConjunctionDISI extends DocIdSetIterator {
257248
this.minLength = minLen;
258249
}
259250

260-
@Override
261-
public int docID() {
262-
return lead.docID();
263-
}
264-
265251
@Override
266252
public int nextDoc() throws IOException {
267253
assert assertItersOnSameDoc()
@@ -297,11 +283,6 @@ private int doNext(int doc) throws IOException {
297283
}
298284
}
299285

300-
@Override
301-
public long cost() {
302-
return lead.cost();
303-
}
304-
305286
// Returns {@code true} if all sub-iterators are on the same doc ID, {@code false} otherwise
306287
private boolean assertItersOnSameDoc() {
307288
int curDoc = lead.docID();

lucene/core/src/java/org/apache/lucene/search/DisjunctionDISIApproximation.java

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
*
3030
* @lucene.internal
3131
*/
32-
public final class DisjunctionDISIApproximation extends DocIdSetIterator {
32+
public final class DisjunctionDISIApproximation extends AbstractDocIdSetIterator {
3333

3434
public static DisjunctionDISIApproximation of(
3535
Collection<? extends DisiWrapper> subIterators, long leadCost) {
@@ -111,11 +111,6 @@ public long cost() {
111111
return cost;
112112
}
113113

114-
@Override
115-
public int docID() {
116-
return Math.min(minOtherDoc, leadTop.doc);
117-
}
118-
119114
@Override
120115
public int nextDoc() throws IOException {
121116
if (leadTop.doc < minOtherDoc) {
@@ -124,7 +119,7 @@ public int nextDoc() throws IOException {
124119
leadTop.doc = leadTop.approximation.nextDoc();
125120
leadTop = leadIterators.updateTop();
126121
} while (leadTop.doc == curDoc);
127-
return Math.min(leadTop.doc, minOtherDoc);
122+
return doc = Math.min(leadTop.doc, minOtherDoc);
128123
} else {
129124
return advance(minOtherDoc + 1);
130125
}
@@ -145,7 +140,7 @@ public int advance(int target) throws IOException {
145140
minOtherDoc = Math.min(minOtherDoc, w.doc);
146141
}
147142

148-
return Math.min(leadTop.doc, minOtherDoc);
143+
return doc = Math.min(leadTop.doc, minOtherDoc);
149144
}
150145

151146
@Override
@@ -162,6 +157,8 @@ public void intoBitSet(int upTo, FixedBitSet bitSet, int offset) throws IOExcept
162157
w.doc = w.approximation.docID();
163158
minOtherDoc = Math.min(minOtherDoc, w.doc);
164159
}
160+
161+
doc = Math.min(leadTop.doc, minOtherDoc);
165162
}
166163

167164
/** Return the linked list of iterators positioned on the current doc. */

lucene/core/src/java/org/apache/lucene/search/DocIdSetIterator.java

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -55,10 +55,9 @@ public static DocIdSetIterator range(int minDoc, int maxDoc) {
5555
return new RangeDocIdSetIterator(minDoc, maxDoc);
5656
}
5757

58-
private static class RangeDocIdSetIterator extends DocIdSetIterator {
58+
private static class RangeDocIdSetIterator extends AbstractDocIdSetIterator {
5959

6060
private final int minDoc, maxDoc;
61-
private int doc = -1;
6261

6362
RangeDocIdSetIterator(int minDoc, int maxDoc) {
6463
// advance relies on minDoc <= maxDoc for correctness
@@ -67,11 +66,6 @@ private static class RangeDocIdSetIterator extends DocIdSetIterator {
6766
this.maxDoc = maxDoc;
6867
}
6968

70-
@Override
71-
public int docID() {
72-
return doc;
73-
}
74-
7569
@Override
7670
public int nextDoc() {
7771
return advance(doc + 1);

lucene/core/src/java/org/apache/lucene/search/DocValuesRangeIterator.java

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -57,16 +57,14 @@ public DocValuesRangeIterator(
5757
this.innerTwoPhase = twoPhase;
5858
}
5959

60-
abstract static class Approximation extends DocIdSetIterator {
60+
abstract static class Approximation extends AbstractDocIdSetIterator {
6161

6262
private final DocIdSetIterator innerApproximation;
6363

6464
protected final DocValuesSkipper skipper;
6565
protected final long lowerValue;
6666
protected final long upperValue;
6767

68-
private int doc = -1;
69-
7068
// Track a decision for all doc IDs between the current doc ID and upTo inclusive.
7169
Match match = Match.MAYBE;
7270
int upTo = -1;
@@ -82,11 +80,6 @@ abstract static class Approximation extends DocIdSetIterator {
8280
this.upperValue = upperValue;
8381
}
8482

85-
@Override
86-
public int docID() {
87-
return doc;
88-
}
89-
9083
@Override
9184
public int nextDoc() throws IOException {
9285
return advance(docID() + 1);

lucene/core/src/java/org/apache/lucene/search/FilteredDocIdSetIterator.java

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,8 @@
2222
* Abstract decorator class of a DocIdSetIterator implementation that provides on-demand
2323
* filter/validation mechanism on an underlying DocIdSetIterator.
2424
*/
25-
public abstract class FilteredDocIdSetIterator extends DocIdSetIterator {
25+
public abstract class FilteredDocIdSetIterator extends AbstractDocIdSetIterator {
2626
protected DocIdSetIterator _innerIter;
27-
private int doc;
2827

2928
/**
3029
* Constructor.
@@ -53,11 +52,6 @@ public DocIdSetIterator getDelegate() {
5352
*/
5453
protected abstract boolean match(int doc) throws IOException;
5554

56-
@Override
57-
public int docID() {
58-
return doc;
59-
}
60-
6155
@Override
6256
public int nextDoc() throws IOException {
6357
while ((doc = _innerIter.nextDoc()) != NO_MORE_DOCS) {

lucene/core/src/java/org/apache/lucene/search/ImpactsDISI.java

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,8 @@
2525
*
2626
* @lucene.internal
2727
*/
28-
public final class ImpactsDISI extends DocIdSetIterator {
28+
public final class ImpactsDISI extends FilterDocIdSetIterator {
2929

30-
private final DocIdSetIterator in;
3130
private final MaxScoreCache maxScoreCache;
3231
private float minCompetitiveScore = 0;
3332
private int upTo = DocIdSetIterator.NO_MORE_DOCS;
@@ -40,7 +39,7 @@ public final class ImpactsDISI extends DocIdSetIterator {
4039
* @param maxScoreCache the cache of maximum scores, typically computed from the same ImpactsEnum
4140
*/
4241
public ImpactsDISI(DocIdSetIterator in, MaxScoreCache maxScoreCache) {
43-
this.in = in;
42+
super(in);
4443
this.maxScoreCache = maxScoreCache;
4544
}
4645

@@ -112,14 +111,4 @@ public int nextDoc() throws IOException {
112111
}
113112
return advance(in.docID() + 1);
114113
}
115-
116-
@Override
117-
public int docID() {
118-
return in.docID();
119-
}
120-
121-
@Override
122-
public long cost() {
123-
return in.cost();
124-
}
125114
}

lucene/core/src/java/org/apache/lucene/search/IndexSortSortedNumericDocValuesRangeQuery.java

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -685,28 +685,21 @@ static IteratorAndCount sparseRange(int minDoc, int maxDoc, DocIdSetIterator del
685685
* A doc ID set iterator that wraps a delegate iterator and only returns doc IDs in the range
686686
* [firstDocInclusive, lastDoc).
687687
*/
688-
private static class BoundedDocIdSetIterator extends DocIdSetIterator {
688+
private static class BoundedDocIdSetIterator extends AbstractDocIdSetIterator {
689689
private final int firstDoc;
690690
private final int lastDoc;
691691
private final DocIdSetIterator delegate;
692692

693-
private int docID = -1;
694-
695693
BoundedDocIdSetIterator(int firstDoc, int lastDoc, DocIdSetIterator delegate) {
696694
assert delegate != null;
697695
this.firstDoc = firstDoc;
698696
this.lastDoc = lastDoc;
699697
this.delegate = delegate;
700698
}
701699

702-
@Override
703-
public int docID() {
704-
return docID;
705-
}
706-
707700
@Override
708701
public int nextDoc() throws IOException {
709-
return advance(docID + 1);
702+
return advance(doc + 1);
710703
}
711704

712705
@Override
@@ -717,11 +710,11 @@ public int advance(int target) throws IOException {
717710

718711
int result = delegate.advance(target);
719712
if (result < lastDoc) {
720-
docID = result;
713+
doc = result;
721714
} else {
722-
docID = NO_MORE_DOCS;
715+
doc = NO_MORE_DOCS;
723716
}
724-
return docID;
717+
return doc;
725718
}
726719

727720
@Override

0 commit comments

Comments
 (0)