diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 29913a822fe2..a432b0423378 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -70,6 +70,8 @@ Other * GITHUB#14613: Rewrite APIJAR extractor to use Java 24 classfile API and kill ASM dependency also for build system. (Uwe Schindler) +* GITHUB#14705: Use Comparators for some PriorityQueue implementations. (Simon Cooper) + ======================= Lucene 10.3.0 ======================= API Changes diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/QualityQueriesFinder.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/QualityQueriesFinder.java index 8de01bf74b18..8c078820dbf9 100644 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/QualityQueriesFinder.java +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/QualityQueriesFinder.java @@ -18,6 +18,7 @@ import java.io.IOException; import java.nio.file.Paths; +import java.util.Comparator; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.MultiTerms; @@ -99,7 +100,8 @@ private static String formatQueryAsTrecTopic( } private String[] bestTerms(String field, int numTerms) throws IOException { - PriorityQueue pq = new TermsDfQueue(numTerms); + PriorityQueue pq = + PriorityQueue.usingComparator(numTerms, Comparator.comparingInt(tdf -> tdf.df)); IndexReader ir = DirectoryReader.open(dir); try { int threshold = ir.maxDoc() / 10; // ignore words too common. @@ -136,15 +138,4 @@ private static class TermDf { this.df = freq; } } - - private static class TermsDfQueue extends PriorityQueue { - TermsDfQueue(int maxSize) { - super(maxSize); - } - - @Override - protected boolean lessThan(TermDf tf1, TermDf tf2) { - return tf1.df < tf2.df; - } - } } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90CompoundFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90CompoundFormat.java index 80b98e0a4c52..67200bea1028 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90CompoundFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90CompoundFormat.java @@ -17,6 +17,9 @@ package org.apache.lucene.codecs.lucene90; import java.io.IOException; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.CompoundDirectory; import org.apache.lucene.codecs.CompoundFormat; @@ -27,7 +30,6 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.util.PriorityQueue; /** * Lucene 9.0 compound file format @@ -105,29 +107,18 @@ public void write(Directory dir, SegmentInfo si, IOContext context) throws IOExc private record SizedFile(String name, long length) {} - private static class SizedFileQueue extends PriorityQueue { - SizedFileQueue(int maxSize) { - super(maxSize); - } - - @Override - protected boolean lessThan(SizedFile sf1, SizedFile sf2) { - return sf1.length < sf2.length; - } - } - private void writeCompoundFile( IndexOutput entries, IndexOutput data, Directory dir, SegmentInfo si) throws IOException { // write number of files int numFiles = si.files().size(); entries.writeVInt(numFiles); // first put files in ascending size order so small files fit more likely into one page - SizedFileQueue pq = new SizedFileQueue(numFiles); + List files = new ArrayList<>(numFiles); for (String filename : si.files()) { - pq.add(new SizedFile(filename, dir.fileLength(filename))); + files.add(new SizedFile(filename, dir.fileLength(filename))); } - while (pq.size() > 0) { - SizedFile sizedFile = pq.pop(); + files.sort(Comparator.comparingLong(SizedFile::length)); + for (SizedFile sizedFile : files) { String file = sizedFile.name; // align file start offset long startOffset = data.alignFilePointer(Long.BYTES); diff --git a/lucene/core/src/java/org/apache/lucene/index/OrdinalMap.java b/lucene/core/src/java/org/apache/lucene/index/OrdinalMap.java index 5785c5dc938f..1ab5f014f398 100644 --- a/lucene/core/src/java/org/apache/lucene/index/OrdinalMap.java +++ b/lucene/core/src/java/org/apache/lucene/index/OrdinalMap.java @@ -46,18 +46,6 @@ public class OrdinalMap implements Accountable { // need it // TODO: use more efficient packed ints structures? - private static class TermsEnumPriorityQueue extends PriorityQueue { - - TermsEnumPriorityQueue(int size) { - super(size); - } - - @Override - protected boolean lessThan(TermsEnumIndex a, TermsEnumIndex b) { - return a.compareTermTo(b) < 0; - } - } - private static class SegmentMap implements Accountable { private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(SegmentMap.class); @@ -265,7 +253,8 @@ public static OrdinalMap build( long[] segmentOrds = new long[subs.length]; // Just merge-sorts by term: - TermsEnumPriorityQueue queue = new TermsEnumPriorityQueue(subs.length); + PriorityQueue queue = + PriorityQueue.usingComparator(subs.length, TermsEnumIndex::compareTermTo); for (int i = 0; i < subs.length; i++) { TermsEnumIndex sub = new TermsEnumIndex(subs[segmentMap.newToOld(i)], i); diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java b/lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java index 9bb2ec7882ae..766e448ab078 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java @@ -18,7 +18,7 @@ import java.io.IOException; import java.util.Collection; -import java.util.Objects; +import java.util.Comparator; import org.apache.lucene.internal.hppc.LongArrayList; import org.apache.lucene.util.Bits; import org.apache.lucene.util.FixedBitSet; @@ -40,43 +40,14 @@ static class Bucket { int freq; } - static final class HeadPriorityQueue extends PriorityQueue { - - public HeadPriorityQueue(int maxSize) { - super(maxSize); - } - - @Override - protected boolean lessThan(DisiWrapper a, DisiWrapper b) { - return a.doc < b.doc; - } - } - - static final class TailPriorityQueue extends PriorityQueue { - - public TailPriorityQueue(int maxSize) { - super(maxSize); - } - - @Override - protected boolean lessThan(DisiWrapper a, DisiWrapper b) { - return a.cost < b.cost; - } - - public DisiWrapper get(int i) { - Objects.checkIndex(i, size()); - return (DisiWrapper) getHeapArray()[1 + i]; - } - } - // One bucket per doc ID in the window, non-null if scores are needed or if frequencies need to be // counted final Bucket[] buckets; final FixedBitSet matching = new FixedBitSet(SIZE); final DisiWrapper[] leads; - final HeadPriorityQueue head; - final TailPriorityQueue tail; + final PriorityQueue head; + final PriorityQueue tail; final Score score = new Score(); final int minShouldMatch; final long cost; @@ -101,8 +72,11 @@ public DisiWrapper get(int i) { buckets = null; } this.leads = new DisiWrapper[scorers.size()]; - this.head = new HeadPriorityQueue(scorers.size() - minShouldMatch + 1); - this.tail = new TailPriorityQueue(minShouldMatch - 1); + this.head = + PriorityQueue.usingComparator( + scorers.size() - minShouldMatch + 1, Comparator.comparingInt(d -> d.doc)); + this.tail = + PriorityQueue.usingComparator(minShouldMatch - 1, Comparator.comparingLong(d -> d.cost)); this.minShouldMatch = minShouldMatch; this.needsScores = needsScores; LongArrayList costs = new LongArrayList(scorers.size()); @@ -204,8 +178,8 @@ private void scoreWindowIntoBitSetAndReplay( private DisiWrapper advance(int min) throws IOException { assert tail.size() == minShouldMatch - 1; - final HeadPriorityQueue head = this.head; - final TailPriorityQueue tail = this.tail; + final PriorityQueue head = this.head; + final PriorityQueue tail = this.tail; DisiWrapper headTop = head.top(); DisiWrapper tailTop = tail.top(); while (headTop.doc < min) { @@ -246,8 +220,8 @@ private void scoreWindowMultipleScorers( if (maxFreq >= minShouldMatch) { // There might be matches in other scorers from the tail too - for (int i = 0; i < tail.size(); ++i) { - leads[maxFreq++] = tail.get(i); + for (DisiWrapper disiWrapper : tail) { + leads[maxFreq++] = disiWrapper; } tail.clear(); diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxBulkScorer.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxBulkScorer.java index 78184b48eda8..e6dc70502709 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxBulkScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxBulkScorer.java @@ -18,6 +18,7 @@ import java.io.IOException; import java.util.Arrays; +import java.util.Comparator; import java.util.List; import java.util.Objects; import org.apache.lucene.util.Bits; @@ -51,12 +52,7 @@ private static class BulkScorerAndNext { throw new IllegalArgumentException(); } this.scorers = - new PriorityQueue<>(scorers.size()) { - @Override - protected boolean lessThan(BulkScorerAndNext a, BulkScorerAndNext b) { - return a.next < b.next; - } - }; + PriorityQueue.usingComparator(scorers.size(), Comparator.comparingInt(b -> b.next)); for (BulkScorer scorer : scorers) { this.scorers.add(new BulkScorerAndNext(scorer)); } diff --git a/lucene/core/src/java/org/apache/lucene/search/ExactPhraseMatcher.java b/lucene/core/src/java/org/apache/lucene/search/ExactPhraseMatcher.java index 79c7d2f418f5..170f7fa61c8c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ExactPhraseMatcher.java +++ b/lucene/core/src/java/org/apache/lucene/search/ExactPhraseMatcher.java @@ -20,6 +20,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.Comparator; import java.util.Iterator; import java.util.List; import org.apache.lucene.index.Impact; @@ -260,12 +261,8 @@ public List getImpacts(int level) { final int docIdUpTo = getDocIdUpTo(level); PriorityQueue pq = - new PriorityQueue<>(impacts.length) { - @Override - protected boolean lessThan(SubIterator a, SubIterator b) { - return a.current.freq < b.current.freq; - } - }; + PriorityQueue.usingComparator( + impacts.length, Comparator.comparingInt(si -> si.current.freq)); boolean hasImpacts = false; List onlyImpactList = null; diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java index 83510c029912..d1d4c7063de3 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java @@ -20,6 +20,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.Comparator; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -412,7 +413,7 @@ private boolean termArraysEquals(Term[][] termArrays1, Term[][] termArrays2) { */ public static class UnionPostingsEnum extends PostingsEnum { /** queue ordered by docid */ - final DocsQueue docsQueue; + final PriorityQueue docsQueue; /** cost of this enum: sum of its subs */ final long cost; @@ -427,7 +428,8 @@ public static class UnionPostingsEnum extends PostingsEnum { final PostingsEnum[] subs; public UnionPostingsEnum(Collection subs) { - docsQueue = new DocsQueue(subs.size()); + docsQueue = + PriorityQueue.usingComparator(subs.size(), Comparator.comparingInt(PostingsEnum::docID)); long cost = 0; for (PostingsEnum sub : subs) { docsQueue.add(sub); @@ -511,18 +513,6 @@ public BytesRef getPayload() throws IOException { return null; // payloads are unsupported } - /** disjunction of postings ordered by docid. */ - static class DocsQueue extends PriorityQueue { - DocsQueue(int size) { - super(size); - } - - @Override - public final boolean lessThan(PostingsEnum a, PostingsEnum b) { - return a.docID() < b.docID(); - } - } - /** * queue of terms for a single document. its a sorted array of all the positions from all the * postings @@ -592,12 +582,7 @@ public static class UnionFullPostingsEnum extends UnionPostingsEnum { public UnionFullPostingsEnum(List subs) { super(subs); this.posQueue = - new PriorityQueue(subs.size()) { - @Override - protected boolean lessThan(PostingsAndPosition a, PostingsAndPosition b) { - return a.pos < b.pos; - } - }; + PriorityQueue.usingComparator(subs.size(), Comparator.comparingInt(p -> p.pos)); this.subs = new ArrayList<>(); for (PostingsEnum pe : subs) { this.subs.add(new PostingsAndPosition(pe)); diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreBlendedWrapper.java b/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreBlendedWrapper.java index b4b81f66e6c1..b35a17f3954f 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreBlendedWrapper.java +++ b/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreBlendedWrapper.java @@ -18,6 +18,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Comparator; import java.util.List; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.PostingsEnum; @@ -58,12 +59,8 @@ protected WeightOrDocIdSetIterator rewriteInner( throws IOException { DocIdSetBuilder otherTerms = new DocIdSetBuilder(context.reader().maxDoc(), terms); PriorityQueue highFrequencyTerms = - new PriorityQueue<>(collectedTerms.size()) { - @Override - protected boolean lessThan(PostingsEnum a, PostingsEnum b) { - return a.cost() < b.cost(); - } - }; + PriorityQueue.usingComparator( + collectedTerms.size(), Comparator.comparingLong(PostingsEnum::cost)); // Handle the already-collected terms: PostingsEnum reuse = null; diff --git a/lucene/core/src/java/org/apache/lucene/search/ScorerUtil.java b/lucene/core/src/java/org/apache/lucene/search/ScorerUtil.java index 082ffd469de9..19a42b5b29dd 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ScorerUtil.java +++ b/lucene/core/src/java/org/apache/lucene/search/ScorerUtil.java @@ -17,6 +17,7 @@ package org.apache.lucene.search; import java.io.IOException; +import java.util.Comparator; import java.util.stream.LongStream; import java.util.stream.StreamSupport; import org.apache.lucene.codecs.lucene103.Lucene103PostingsFormat; @@ -49,12 +50,7 @@ static long costWithMinShouldMatch(LongStream costs, int numScorers, int minShou // If we recurse infinitely, we find out that the cost of a msm query is the sum of the // costs of the num_scorers - minShouldMatch + 1 least costly scorers final PriorityQueue pq = - new PriorityQueue(numScorers - minShouldMatch + 1) { - @Override - protected boolean lessThan(Long a, Long b) { - return a > b; - } - }; + PriorityQueue.usingComparator(numScorers - minShouldMatch + 1, Comparator.reverseOrder()); costs.forEach(pq::insertWithOverflow); return StreamSupport.stream(pq.spliterator(), false).mapToLong(Number::longValue).sum(); } diff --git a/lucene/core/src/java/org/apache/lucene/search/comparators/TermOrdValComparator.java b/lucene/core/src/java/org/apache/lucene/search/comparators/TermOrdValComparator.java index ae21efe76c88..e97d8eb97bdb 100644 --- a/lucene/core/src/java/org/apache/lucene/search/comparators/TermOrdValComparator.java +++ b/lucene/core/src/java/org/apache/lucene/search/comparators/TermOrdValComparator.java @@ -18,6 +18,7 @@ import java.io.IOException; import java.util.ArrayDeque; +import java.util.Comparator; import org.apache.lucene.index.DocValues; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.IndexOptions; @@ -585,12 +586,7 @@ private void init(int minOrd, int maxOrd) throws IOException { } } disjunction = - new PriorityQueue(size) { - @Override - protected boolean lessThan(PostingsEnumAndOrd a, PostingsEnumAndOrd b) { - return a.postings.docID() < b.postings.docID(); - } - }; + PriorityQueue.usingComparator(size, Comparator.comparingInt(p -> p.postings.docID())); disjunction.addAll(postings); } } diff --git a/lucene/core/src/java/org/apache/lucene/util/OfflineSorter.java b/lucene/core/src/java/org/apache/lucene/util/OfflineSorter.java index 61db0db1c42c..c22236a7a218 100644 --- a/lucene/core/src/java/org/apache/lucene/util/OfflineSorter.java +++ b/lucene/core/src/java/org/apache/lucene/util/OfflineSorter.java @@ -687,12 +687,8 @@ public Partition call() throws IOException { } PriorityQueue queue = - new PriorityQueue<>(segmentsToMerge.size()) { - @Override - protected boolean lessThan(FileAndTop a, FileAndTop b) { - return comparator.compare(a.current, b.current) < 0; - } - }; + PriorityQueue.usingComparator( + segmentsToMerge.size(), Comparator.comparing(ft -> ft.current, comparator)); ByteSequencesReader[] streams = new ByteSequencesReader[segmentsToMerge.size()]; diff --git a/lucene/core/src/java/org/apache/lucene/util/PriorityQueue.java b/lucene/core/src/java/org/apache/lucene/util/PriorityQueue.java index 4760256d8ee9..7900b0f5bc7b 100644 --- a/lucene/core/src/java/org/apache/lucene/util/PriorityQueue.java +++ b/lucene/core/src/java/org/apache/lucene/util/PriorityQueue.java @@ -16,7 +16,9 @@ */ package org.apache.lucene.util; +import java.util.Arrays; import java.util.Collection; +import java.util.Comparator; import java.util.Iterator; import java.util.NoSuchElementException; import java.util.function.Supplier; @@ -34,6 +36,17 @@ * @lucene.internal */ public abstract class PriorityQueue implements Iterable { + + public static PriorityQueue usingComparator( + int maxSize, Comparator comparator) { + return new PriorityQueue<>(maxSize) { + @Override + protected boolean lessThan(T a, T b) { + return comparator.compare(a, b) < 0; + } + }; + } + private int size = 0; private final int maxSize; private final T[] heap; @@ -242,9 +255,7 @@ public final int size() { /** Removes all entries from the PriorityQueue. */ public final void clear() { - for (int i = 0; i <= size; i++) { - heap[i] = null; - } + Arrays.fill(heap, 0, size + 1, null); size = 0; } diff --git a/lucene/core/src/test/org/apache/lucene/util/TestPriorityQueue.java b/lucene/core/src/test/org/apache/lucene/util/TestPriorityQueue.java index dc7f2feb9e09..19246b5c1bc0 100644 --- a/lucene/core/src/test/org/apache/lucene/util/TestPriorityQueue.java +++ b/lucene/core/src/test/org/apache/lucene/util/TestPriorityQueue.java @@ -101,11 +101,16 @@ protected boolean lessThan(Value a, Value b) { } public void testPQ() throws Exception { - testPQ(atLeast(10000), random()); + int size = atLeast(10000); + testPQ(new IntegerQueue(size), size, random()); } - public static void testPQ(int count, Random gen) { - PriorityQueue pq = new IntegerQueue(count); + public void testComparatorPQ() throws Exception { + int size = atLeast(10000); + testPQ(PriorityQueue.usingComparator(size, Integer::compareTo), size, random()); + } + + public static void testPQ(PriorityQueue pq, int count, Random gen) { int sum = 0, sum2 = 0; for (int i = 0; i < count; i++) { diff --git a/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysScorer.java b/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysScorer.java index 3a5bfc3c4649..42b9f8b8855f 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysScorer.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysScorer.java @@ -231,12 +231,7 @@ private void doQueryFirstScoring(Bits acceptDocs, LeafCollector collector, DocsA // single-valued PQ ordered by docID to easily determine the "closest" runaway dim we'll use // for advancing in the case that multiple dim approximations miss. PriorityQueue runawayDim = - new PriorityQueue<>(1) { - @Override - protected boolean lessThan(DocsAndCost a, DocsAndCost b) { - return a.approximation.docID() < b.approximation.docID(); - } - }; + PriorityQueue.usingComparator(1, Comparator.comparingInt(dc -> dc.approximation.docID())); int docID = baseApproximation.docID(); diff --git a/lucene/grouping/src/java/org/apache/lucene/search/grouping/GroupFacetCollector.java b/lucene/grouping/src/java/org/apache/lucene/search/grouping/GroupFacetCollector.java index e598ebdcd12a..2cd78209f288 100644 --- a/lucene/grouping/src/java/org/apache/lucene/search/grouping/GroupFacetCollector.java +++ b/lucene/grouping/src/java/org/apache/lucene/search/grouping/GroupFacetCollector.java @@ -18,6 +18,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Comparator; import java.util.LinkedList; import java.util.List; import java.util.NavigableSet; @@ -68,7 +69,9 @@ public GroupedFacetResult mergeSegmentResults(int size, int minCount, boolean or throws IOException { int totalCount = 0; int missingCount = 0; - SegmentResultPriorityQueue segments = new SegmentResultPriorityQueue(segmentResults.size()); + PriorityQueue segments = + PriorityQueue.usingComparator( + segmentResults.size(), Comparator.comparing(sr -> sr.mergeTerm)); for (SegmentResult segmentResult : segmentResults) { missingCount += segmentResult.missing; if (segmentResult.mergePos >= segmentResult.maxTermPos) { @@ -253,16 +256,4 @@ protected SegmentResult(int[] counts, int total, int missing, int maxTermPos) { */ protected abstract void nextTerm() throws IOException; } - - private static class SegmentResultPriorityQueue extends PriorityQueue { - - SegmentResultPriorityQueue(int maxSize) { - super(maxSize); - } - - @Override - protected boolean lessThan(SegmentResult a, SegmentResult b) { - return a.mergeTerm.compareTo(b.mergeTerm) < 0; - } - } } diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/matchhighlight/PassageSelector.java b/lucene/highlighter/src/java/org/apache/lucene/search/matchhighlight/PassageSelector.java index 05cba2f1b1e7..b3d929e3e246 100644 --- a/lucene/highlighter/src/java/org/apache/lucene/search/matchhighlight/PassageSelector.java +++ b/lucene/highlighter/src/java/org/apache/lucene/search/matchhighlight/PassageSelector.java @@ -93,13 +93,7 @@ public List pickBest( int pqSize = Math.max(16, maxPassages); // Best passages so far. - PriorityQueue pq = - new PriorityQueue<>(pqSize) { - @Override - protected boolean lessThan(Passage a, Passage b) { - return passageScorer.compare(a, b) < 0; - } - }; + PriorityQueue pq = PriorityQueue.usingComparator(pqSize, passageScorer); markers = splitOrTruncateToWindows(markers, maxPassageWindow, permittedPassageRanges); diff --git a/lucene/misc/src/java/org/apache/lucene/misc/HighFreqTerms.java b/lucene/misc/src/java/org/apache/lucene/misc/HighFreqTerms.java index 9633e92ac7e7..0da7f5d9510b 100644 --- a/lucene/misc/src/java/org/apache/lucene/misc/HighFreqTerms.java +++ b/lucene/misc/src/java/org/apache/lucene/misc/HighFreqTerms.java @@ -99,7 +99,7 @@ private static void usage() { public static TermStats[] getHighFreqTerms( IndexReader reader, int numTerms, String field, Comparator comparator) throws Exception { - TermStatsQueue tiq = null; + PriorityQueue tiq = null; if (field != null) { Terms terms = MultiTerms.getTerms(reader, field); @@ -108,18 +108,18 @@ public static TermStats[] getHighFreqTerms( } TermsEnum termsEnum = terms.iterator(); - tiq = new TermStatsQueue(numTerms, comparator); - tiq.fill(field, termsEnum); + tiq = PriorityQueue.usingComparator(numTerms, comparator); + fill(tiq, field, termsEnum); } else { Collection fields = FieldInfos.getIndexedFields(reader); - if (fields.size() == 0) { + if (fields.isEmpty()) { throw new RuntimeException("no fields found for this index"); } - tiq = new TermStatsQueue(numTerms, comparator); + tiq = PriorityQueue.usingComparator(numTerms, comparator); for (String fieldName : fields) { Terms terms = MultiTerms.getTerms(reader, fieldName); if (terms != null) { - tiq.fill(fieldName, terms.iterator()); + fill(tiq, fieldName, terms.iterator()); } } } @@ -167,26 +167,12 @@ public int compare(TermStats a, TermStats b) { } } - /** Priority queue for TermStats objects */ - static final class TermStatsQueue extends PriorityQueue { - final Comparator comparator; - - TermStatsQueue(int size, Comparator comparator) { - super(size); - this.comparator = comparator; - } - - @Override - protected boolean lessThan(TermStats termInfoA, TermStats termInfoB) { - return comparator.compare(termInfoA, termInfoB) < 0; - } - - protected void fill(String field, TermsEnum termsEnum) throws IOException { - BytesRef term = null; - while ((term = termsEnum.next()) != null) { - insertWithOverflow( - new TermStats(field, term, termsEnum.docFreq(), termsEnum.totalTermFreq())); - } + private static void fill(PriorityQueue queue, String field, TermsEnum termsEnum) + throws IOException { + BytesRef term = null; + while ((term = termsEnum.next()) != null) { + queue.insertWithOverflow( + new TermStats(field, term, termsEnum.docFreq(), termsEnum.totalTermFreq())); } } } diff --git a/lucene/queries/src/test/org/apache/lucene/queries/TestCommonTermsQuery.java b/lucene/queries/src/test/org/apache/lucene/queries/TestCommonTermsQuery.java index 9282fdb79461..abaf31d75b1a 100644 --- a/lucene/queries/src/test/org/apache/lucene/queries/TestCommonTermsQuery.java +++ b/lucene/queries/src/test/org/apache/lucene/queries/TestCommonTermsQuery.java @@ -19,6 +19,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; +import java.util.Comparator; import java.util.HashSet; import java.util.List; import java.util.Random; @@ -426,22 +427,10 @@ public void testRandomIndex() throws IOException { LeafReader wrapper = getOnlyLeafReader(reader); String field = "body"; Terms terms = wrapper.terms(field); + Comparator compareFreq = Comparator.comparingInt(tf -> tf.freq); PriorityQueue lowFreqQueue = - new PriorityQueue(5) { - - @Override - protected boolean lessThan(TermAndFreq a, TermAndFreq b) { - return a.freq > b.freq; - } - }; - PriorityQueue highFreqQueue = - new PriorityQueue(5) { - - @Override - protected boolean lessThan(TermAndFreq a, TermAndFreq b) { - return a.freq < b.freq; - } - }; + PriorityQueue.usingComparator(5, compareFreq.reversed()); + PriorityQueue highFreqQueue = PriorityQueue.usingComparator(5, compareFreq); try { TermsEnum iterator = terms.iterator(); while (iterator.next() != null) { diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/iterators/TopnOrdinalIterator.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/iterators/TopnOrdinalIterator.java index 684afec5f5de..66e19c34ef2c 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/iterators/TopnOrdinalIterator.java +++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/iterators/TopnOrdinalIterator.java @@ -17,6 +17,7 @@ package org.apache.lucene.sandbox.facet.iterators; import java.io.IOException; +import java.util.Comparator; import org.apache.lucene.util.PriorityQueue; /** @@ -50,7 +51,8 @@ private void getTopN() throws IOException { // probably doesn't make sense for large enough taxonomy indexes? // e.g. TopOrdAndIntQueue q = new TopComparableQueue(Math.min(taxoReader.getSize(), topN)); // TODO: create queue lazily - skip if first nextOrd is NO_MORE_ORDS ? - TopComparableQueue queue = new TopComparableQueue<>(topN); + PriorityQueue> queue = + PriorityQueue.usingComparator(topN, Comparator.comparing(p -> p.comparable)); OrdComparablePair reuse = null; for (int ord = sourceOrds.nextOrd(); ord != NO_MORE_ORDS; ord = sourceOrds.nextOrd()) { if (reuse == null) { @@ -81,21 +83,6 @@ public int nextOrd() throws IOException { return result[currentIndex++]; } - /** Keeps top N results ordered by Comparable. */ - private static class TopComparableQueue> - extends PriorityQueue> { - - /** Sole constructor. */ - public TopComparableQueue(int topN) { - super(topN); - } - - @Override - protected boolean lessThan(OrdComparablePair a, OrdComparablePair b) { - return a.lessThan(b); - } - } - /** Pair of ordinal and comparable to use in TopComparableQueue */ private static class OrdComparablePair> { int ordinal; @@ -105,9 +92,5 @@ private OrdComparablePair(int ordinal, T comparable) { this.ordinal = ordinal; this.comparable = comparable; } - - boolean lessThan(OrdComparablePair other) { - return comparable.compareTo(other.comparable) < 0; - } } } diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/TermAutomatonScorer.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/TermAutomatonScorer.java index 7c6d6c9b6bbd..783dd7b180cc 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/TermAutomatonScorer.java +++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/TermAutomatonScorer.java @@ -17,6 +17,7 @@ package org.apache.lucene.sandbox.search; import java.io.IOException; +import java.util.Comparator; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.sandbox.search.TermAutomatonQuery.EnumAndScorer; import org.apache.lucene.sandbox.search.TermAutomatonQuery.TermAutomatonWeight; @@ -73,8 +74,11 @@ public TermAutomatonScorer( this.runAutomaton = new TermRunAutomaton(weight.automaton, subs.length); this.scorer = scorer; this.norms = norms; - this.docIDQueue = new DocIDQueue(subs.length); - this.posQueue = new PositionQueue(subs.length); + this.docIDQueue = + PriorityQueue.usingComparator( + subs.length, Comparator.comparingInt(es -> es.posEnum.docID())); + this.posQueue = + PriorityQueue.usingComparator(subs.length, Comparator.comparingInt(es -> es.pos)); this.anyTermID = anyTermID; this.subsOnDoc = new EnumAndScorer[subs.length]; this.positions = new PosState[4]; @@ -94,30 +98,6 @@ public TermAutomatonScorer( this.cost = cost; } - /** Sorts by docID so we can quickly pull out all scorers that are on the same (lowest) docID. */ - private static class DocIDQueue extends PriorityQueue { - public DocIDQueue(int maxSize) { - super(maxSize); - } - - @Override - protected boolean lessThan(EnumAndScorer a, EnumAndScorer b) { - return a.posEnum.docID() < b.posEnum.docID(); - } - } - - /** Sorts by position so we can visit all scorers on one doc, by position. */ - private static class PositionQueue extends PriorityQueue { - public PositionQueue(int maxSize) { - super(maxSize); - } - - @Override - protected boolean lessThan(EnumAndScorer a, EnumAndScorer b) { - return a.pos < b.pos; - } - } - /** Pops all enums positioned on the current (minimum) doc */ private void popCurrentDoc() { assert numSubsOnDoc == 0;