Skip to content

Commit 9afcfdb

Browse files
authored
Swap out some simple PriorityQueue subclasses for one using a Comparator (#14705)
1 parent a421480 commit 9afcfdb

File tree

21 files changed

+98
-254
lines changed

21 files changed

+98
-254
lines changed

lucene/CHANGES.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,8 @@ Other
7070

7171
* GITHUB#14613: Rewrite APIJAR extractor to use Java 24 classfile API and kill ASM dependency also for build system. (Uwe Schindler)
7272

73+
* GITHUB#14705: Use Comparators for some PriorityQueue implementations. (Simon Cooper)
74+
7375
======================= Lucene 10.3.0 =======================
7476

7577
API Changes

lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/QualityQueriesFinder.java

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
import java.io.IOException;
2020
import java.nio.file.Paths;
21+
import java.util.Comparator;
2122
import org.apache.lucene.index.DirectoryReader;
2223
import org.apache.lucene.index.IndexReader;
2324
import org.apache.lucene.index.MultiTerms;
@@ -99,7 +100,8 @@ private static String formatQueryAsTrecTopic(
99100
}
100101

101102
private String[] bestTerms(String field, int numTerms) throws IOException {
102-
PriorityQueue<TermDf> pq = new TermsDfQueue(numTerms);
103+
PriorityQueue<TermDf> pq =
104+
PriorityQueue.usingComparator(numTerms, Comparator.comparingInt(tdf -> tdf.df));
103105
IndexReader ir = DirectoryReader.open(dir);
104106
try {
105107
int threshold = ir.maxDoc() / 10; // ignore words too common.
@@ -136,15 +138,4 @@ private static class TermDf {
136138
this.df = freq;
137139
}
138140
}
139-
140-
private static class TermsDfQueue extends PriorityQueue<TermDf> {
141-
TermsDfQueue(int maxSize) {
142-
super(maxSize);
143-
}
144-
145-
@Override
146-
protected boolean lessThan(TermDf tf1, TermDf tf2) {
147-
return tf1.df < tf2.df;
148-
}
149-
}
150141
}

lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90CompoundFormat.java

Lines changed: 7 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@
1717
package org.apache.lucene.codecs.lucene90;
1818

1919
import java.io.IOException;
20+
import java.util.ArrayList;
21+
import java.util.Comparator;
22+
import java.util.List;
2023
import org.apache.lucene.codecs.CodecUtil;
2124
import org.apache.lucene.codecs.CompoundDirectory;
2225
import org.apache.lucene.codecs.CompoundFormat;
@@ -27,7 +30,6 @@
2730
import org.apache.lucene.store.Directory;
2831
import org.apache.lucene.store.IOContext;
2932
import org.apache.lucene.store.IndexOutput;
30-
import org.apache.lucene.util.PriorityQueue;
3133

3234
/**
3335
* Lucene 9.0 compound file format
@@ -105,29 +107,18 @@ public void write(Directory dir, SegmentInfo si, IOContext context) throws IOExc
105107

106108
private record SizedFile(String name, long length) {}
107109

108-
private static class SizedFileQueue extends PriorityQueue<SizedFile> {
109-
SizedFileQueue(int maxSize) {
110-
super(maxSize);
111-
}
112-
113-
@Override
114-
protected boolean lessThan(SizedFile sf1, SizedFile sf2) {
115-
return sf1.length < sf2.length;
116-
}
117-
}
118-
119110
private void writeCompoundFile(
120111
IndexOutput entries, IndexOutput data, Directory dir, SegmentInfo si) throws IOException {
121112
// write number of files
122113
int numFiles = si.files().size();
123114
entries.writeVInt(numFiles);
124115
// first put files in ascending size order so small files fit more likely into one page
125-
SizedFileQueue pq = new SizedFileQueue(numFiles);
116+
List<SizedFile> files = new ArrayList<>(numFiles);
126117
for (String filename : si.files()) {
127-
pq.add(new SizedFile(filename, dir.fileLength(filename)));
118+
files.add(new SizedFile(filename, dir.fileLength(filename)));
128119
}
129-
while (pq.size() > 0) {
130-
SizedFile sizedFile = pq.pop();
120+
files.sort(Comparator.comparingLong(SizedFile::length));
121+
for (SizedFile sizedFile : files) {
131122
String file = sizedFile.name;
132123
// align file start offset
133124
long startOffset = data.alignFilePointer(Long.BYTES);

lucene/core/src/java/org/apache/lucene/index/OrdinalMap.java

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -46,18 +46,6 @@ public class OrdinalMap implements Accountable {
4646
// need it
4747
// TODO: use more efficient packed ints structures?
4848

49-
private static class TermsEnumPriorityQueue extends PriorityQueue<TermsEnumIndex> {
50-
51-
TermsEnumPriorityQueue(int size) {
52-
super(size);
53-
}
54-
55-
@Override
56-
protected boolean lessThan(TermsEnumIndex a, TermsEnumIndex b) {
57-
return a.compareTermTo(b) < 0;
58-
}
59-
}
60-
6149
private static class SegmentMap implements Accountable {
6250
private static final long BASE_RAM_BYTES_USED =
6351
RamUsageEstimator.shallowSizeOfInstance(SegmentMap.class);
@@ -265,7 +253,8 @@ public static OrdinalMap build(
265253
long[] segmentOrds = new long[subs.length];
266254

267255
// Just merge-sorts by term:
268-
TermsEnumPriorityQueue queue = new TermsEnumPriorityQueue(subs.length);
256+
PriorityQueue<TermsEnumIndex> queue =
257+
PriorityQueue.usingComparator(subs.length, TermsEnumIndex::compareTermTo);
269258

270259
for (int i = 0; i < subs.length; i++) {
271260
TermsEnumIndex sub = new TermsEnumIndex(subs[segmentMap.newToOld(i)], i);

lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java

Lines changed: 12 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
import java.io.IOException;
2020
import java.util.Collection;
21-
import java.util.Objects;
21+
import java.util.Comparator;
2222
import org.apache.lucene.internal.hppc.LongArrayList;
2323
import org.apache.lucene.util.Bits;
2424
import org.apache.lucene.util.FixedBitSet;
@@ -40,43 +40,14 @@ static class Bucket {
4040
int freq;
4141
}
4242

43-
static final class HeadPriorityQueue extends PriorityQueue<DisiWrapper> {
44-
45-
public HeadPriorityQueue(int maxSize) {
46-
super(maxSize);
47-
}
48-
49-
@Override
50-
protected boolean lessThan(DisiWrapper a, DisiWrapper b) {
51-
return a.doc < b.doc;
52-
}
53-
}
54-
55-
static final class TailPriorityQueue extends PriorityQueue<DisiWrapper> {
56-
57-
public TailPriorityQueue(int maxSize) {
58-
super(maxSize);
59-
}
60-
61-
@Override
62-
protected boolean lessThan(DisiWrapper a, DisiWrapper b) {
63-
return a.cost < b.cost;
64-
}
65-
66-
public DisiWrapper get(int i) {
67-
Objects.checkIndex(i, size());
68-
return (DisiWrapper) getHeapArray()[1 + i];
69-
}
70-
}
71-
7243
// One bucket per doc ID in the window, non-null if scores are needed or if frequencies need to be
7344
// counted
7445
final Bucket[] buckets;
7546
final FixedBitSet matching = new FixedBitSet(SIZE);
7647

7748
final DisiWrapper[] leads;
78-
final HeadPriorityQueue head;
79-
final TailPriorityQueue tail;
49+
final PriorityQueue<DisiWrapper> head;
50+
final PriorityQueue<DisiWrapper> tail;
8051
final Score score = new Score();
8152
final int minShouldMatch;
8253
final long cost;
@@ -101,8 +72,11 @@ public DisiWrapper get(int i) {
10172
buckets = null;
10273
}
10374
this.leads = new DisiWrapper[scorers.size()];
104-
this.head = new HeadPriorityQueue(scorers.size() - minShouldMatch + 1);
105-
this.tail = new TailPriorityQueue(minShouldMatch - 1);
75+
this.head =
76+
PriorityQueue.usingComparator(
77+
scorers.size() - minShouldMatch + 1, Comparator.comparingInt(d -> d.doc));
78+
this.tail =
79+
PriorityQueue.usingComparator(minShouldMatch - 1, Comparator.comparingLong(d -> d.cost));
10680
this.minShouldMatch = minShouldMatch;
10781
this.needsScores = needsScores;
10882
LongArrayList costs = new LongArrayList(scorers.size());
@@ -204,8 +178,8 @@ private void scoreWindowIntoBitSetAndReplay(
204178

205179
private DisiWrapper advance(int min) throws IOException {
206180
assert tail.size() == minShouldMatch - 1;
207-
final HeadPriorityQueue head = this.head;
208-
final TailPriorityQueue tail = this.tail;
181+
final PriorityQueue<DisiWrapper> head = this.head;
182+
final PriorityQueue<DisiWrapper> tail = this.tail;
209183
DisiWrapper headTop = head.top();
210184
DisiWrapper tailTop = tail.top();
211185
while (headTop.doc < min) {
@@ -246,8 +220,8 @@ private void scoreWindowMultipleScorers(
246220

247221
if (maxFreq >= minShouldMatch) {
248222
// There might be matches in other scorers from the tail too
249-
for (int i = 0; i < tail.size(); ++i) {
250-
leads[maxFreq++] = tail.get(i);
223+
for (DisiWrapper disiWrapper : tail) {
224+
leads[maxFreq++] = disiWrapper;
251225
}
252226
tail.clear();
253227

lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxBulkScorer.java

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
import java.io.IOException;
2020
import java.util.Arrays;
21+
import java.util.Comparator;
2122
import java.util.List;
2223
import java.util.Objects;
2324
import org.apache.lucene.util.Bits;
@@ -51,12 +52,7 @@ private static class BulkScorerAndNext {
5152
throw new IllegalArgumentException();
5253
}
5354
this.scorers =
54-
new PriorityQueue<>(scorers.size()) {
55-
@Override
56-
protected boolean lessThan(BulkScorerAndNext a, BulkScorerAndNext b) {
57-
return a.next < b.next;
58-
}
59-
};
55+
PriorityQueue.usingComparator(scorers.size(), Comparator.comparingInt(b -> b.next));
6056
for (BulkScorer scorer : scorers) {
6157
this.scorers.add(new BulkScorerAndNext(scorer));
6258
}

lucene/core/src/java/org/apache/lucene/search/ExactPhraseMatcher.java

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import java.util.ArrayList;
2121
import java.util.Arrays;
2222
import java.util.Collections;
23+
import java.util.Comparator;
2324
import java.util.Iterator;
2425
import java.util.List;
2526
import org.apache.lucene.index.Impact;
@@ -260,12 +261,8 @@ public List<Impact> getImpacts(int level) {
260261
final int docIdUpTo = getDocIdUpTo(level);
261262

262263
PriorityQueue<SubIterator> pq =
263-
new PriorityQueue<>(impacts.length) {
264-
@Override
265-
protected boolean lessThan(SubIterator a, SubIterator b) {
266-
return a.current.freq < b.current.freq;
267-
}
268-
};
264+
PriorityQueue.usingComparator(
265+
impacts.length, Comparator.comparingInt(si -> si.current.freq));
269266

270267
boolean hasImpacts = false;
271268
List<Impact> onlyImpactList = null;

lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java

Lines changed: 5 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import java.util.ArrayList;
2121
import java.util.Arrays;
2222
import java.util.Collection;
23+
import java.util.Comparator;
2324
import java.util.HashMap;
2425
import java.util.List;
2526
import java.util.Map;
@@ -412,7 +413,7 @@ private boolean termArraysEquals(Term[][] termArrays1, Term[][] termArrays2) {
412413
*/
413414
public static class UnionPostingsEnum extends PostingsEnum {
414415
/** queue ordered by docid */
415-
final DocsQueue docsQueue;
416+
final PriorityQueue<PostingsEnum> docsQueue;
416417

417418
/** cost of this enum: sum of its subs */
418419
final long cost;
@@ -427,7 +428,8 @@ public static class UnionPostingsEnum extends PostingsEnum {
427428
final PostingsEnum[] subs;
428429

429430
public UnionPostingsEnum(Collection<PostingsEnum> subs) {
430-
docsQueue = new DocsQueue(subs.size());
431+
docsQueue =
432+
PriorityQueue.usingComparator(subs.size(), Comparator.comparingInt(PostingsEnum::docID));
431433
long cost = 0;
432434
for (PostingsEnum sub : subs) {
433435
docsQueue.add(sub);
@@ -511,18 +513,6 @@ public BytesRef getPayload() throws IOException {
511513
return null; // payloads are unsupported
512514
}
513515

514-
/** disjunction of postings ordered by docid. */
515-
static class DocsQueue extends PriorityQueue<PostingsEnum> {
516-
DocsQueue(int size) {
517-
super(size);
518-
}
519-
520-
@Override
521-
public final boolean lessThan(PostingsEnum a, PostingsEnum b) {
522-
return a.docID() < b.docID();
523-
}
524-
}
525-
526516
/**
527517
* queue of terms for a single document. its a sorted array of all the positions from all the
528518
* postings
@@ -592,12 +582,7 @@ public static class UnionFullPostingsEnum extends UnionPostingsEnum {
592582
public UnionFullPostingsEnum(List<PostingsEnum> subs) {
593583
super(subs);
594584
this.posQueue =
595-
new PriorityQueue<PostingsAndPosition>(subs.size()) {
596-
@Override
597-
protected boolean lessThan(PostingsAndPosition a, PostingsAndPosition b) {
598-
return a.pos < b.pos;
599-
}
600-
};
585+
PriorityQueue.usingComparator(subs.size(), Comparator.comparingInt(p -> p.pos));
601586
this.subs = new ArrayList<>();
602587
for (PostingsEnum pe : subs) {
603588
this.subs.add(new PostingsAndPosition(pe));

lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreBlendedWrapper.java

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
import java.io.IOException;
2020
import java.util.ArrayList;
21+
import java.util.Comparator;
2122
import java.util.List;
2223
import org.apache.lucene.index.LeafReaderContext;
2324
import org.apache.lucene.index.PostingsEnum;
@@ -58,12 +59,8 @@ protected WeightOrDocIdSetIterator rewriteInner(
5859
throws IOException {
5960
DocIdSetBuilder otherTerms = new DocIdSetBuilder(context.reader().maxDoc(), terms);
6061
PriorityQueue<PostingsEnum> highFrequencyTerms =
61-
new PriorityQueue<>(collectedTerms.size()) {
62-
@Override
63-
protected boolean lessThan(PostingsEnum a, PostingsEnum b) {
64-
return a.cost() < b.cost();
65-
}
66-
};
62+
PriorityQueue.usingComparator(
63+
collectedTerms.size(), Comparator.comparingLong(PostingsEnum::cost));
6764

6865
// Handle the already-collected terms:
6966
PostingsEnum reuse = null;

lucene/core/src/java/org/apache/lucene/search/ScorerUtil.java

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
package org.apache.lucene.search;
1818

1919
import java.io.IOException;
20+
import java.util.Comparator;
2021
import java.util.stream.LongStream;
2122
import java.util.stream.StreamSupport;
2223
import org.apache.lucene.codecs.lucene103.Lucene103PostingsFormat;
@@ -49,12 +50,7 @@ static long costWithMinShouldMatch(LongStream costs, int numScorers, int minShou
4950
// If we recurse infinitely, we find out that the cost of a msm query is the sum of the
5051
// costs of the num_scorers - minShouldMatch + 1 least costly scorers
5152
final PriorityQueue<Long> pq =
52-
new PriorityQueue<Long>(numScorers - minShouldMatch + 1) {
53-
@Override
54-
protected boolean lessThan(Long a, Long b) {
55-
return a > b;
56-
}
57-
};
53+
PriorityQueue.usingComparator(numScorers - minShouldMatch + 1, Comparator.reverseOrder());
5854
costs.forEach(pq::insertWithOverflow);
5955
return StreamSupport.stream(pq.spliterator(), false).mapToLong(Number::longValue).sum();
6056
}

0 commit comments

Comments
 (0)