Skip to content

Commit 3b6408a

Browse files
jpountzromseygeek
andauthored
Merge DocAndFreqBuffer and DocAndScoreBuffer. (#14748)
I initially kept those separated but I'm now coming to the conclusion that they should be merged: - While frequencies are ints today, some queries produce float frequencies (`SloppyPhraseQuery`, `CombinedFieldQuery`). - `TermScorer` no longer needs to keep track of a separate buffer for frequencies. Co-authored-by: Alan Woodward <[email protected]>
1 parent adb44bb commit 3b6408a

File tree

16 files changed

+59
-109
lines changed

16 files changed

+59
-109
lines changed

lucene/core/src/java/org/apache/lucene/codecs/lucene103/Lucene103PostingsReader.java

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@
4646
import org.apache.lucene.index.SegmentReadState;
4747
import org.apache.lucene.internal.vectorization.PostingDecodingUtil;
4848
import org.apache.lucene.internal.vectorization.VectorizationProvider;
49-
import org.apache.lucene.search.DocAndFreqBuffer;
49+
import org.apache.lucene.search.DocAndFloatFeatureBuffer;
5050
import org.apache.lucene.store.ByteArrayDataInput;
5151
import org.apache.lucene.store.ChecksumIndexInput;
5252
import org.apache.lucene.store.DataInput;
@@ -1036,7 +1036,7 @@ public void intoBitSet(int upTo, FixedBitSet bitSet, int offset) throws IOExcept
10361036
}
10371037

10381038
@Override
1039-
public void nextPostings(int upTo, DocAndFreqBuffer buffer) throws IOException {
1039+
public void nextPostings(int upTo, DocAndFloatFeatureBuffer buffer) throws IOException {
10401040
assert needsRefilling == false;
10411041

10421042
if (needsFreq == false) {
@@ -1073,7 +1073,9 @@ public void nextPostings(int upTo, DocAndFreqBuffer buffer) throws IOException {
10731073
}
10741074

10751075
assert buffer.size > 0;
1076-
System.arraycopy(freqBuffer, start, buffer.freqs, 0, buffer.size);
1076+
for (int i = 0; i < buffer.size; ++i) {
1077+
buffer.features[i] = freqBuffer[start + i];
1078+
}
10771079

10781080
advance(upTo);
10791081
}

lucene/core/src/java/org/apache/lucene/index/CheckIndex.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@
6363
import org.apache.lucene.index.PointValues.IntersectVisitor;
6464
import org.apache.lucene.index.PointValues.Relation;
6565
import org.apache.lucene.internal.hppc.IntIntHashMap;
66-
import org.apache.lucene.search.DocAndFreqBuffer;
66+
import org.apache.lucene.search.DocAndFloatFeatureBuffer;
6767
import org.apache.lucene.search.DocIdSetIterator;
6868
import org.apache.lucene.search.FieldExistsQuery;
6969
import org.apache.lucene.search.KnnCollector;
@@ -1653,7 +1653,7 @@ private static Status.TermIndexStatus checkFields(
16531653
postings = termsEnum.postings(postings, PostingsEnum.ALL);
16541654
bulkPostings = termsEnum.postings(bulkPostings, PostingsEnum.ALL);
16551655
bulkPostings.nextDoc();
1656-
DocAndFreqBuffer buffer = new DocAndFreqBuffer();
1656+
DocAndFloatFeatureBuffer buffer = new DocAndFloatFeatureBuffer();
16571657
int bufferIndex = 0;
16581658

16591659
if (hasFreqs == false) {
@@ -1719,10 +1719,10 @@ private static Status.TermIndexStatus checkFields(
17191719
+ " as next doc while PostingsEnum#nextDoc returns "
17201720
+ doc);
17211721
}
1722-
if (freq != buffer.freqs[bufferIndex]) {
1722+
if (freq != buffer.features[bufferIndex]) {
17231723
throw new CheckIndexException(
17241724
"PostingsEnum#nextPostings returns "
1725-
+ buffer.freqs[bufferIndex]
1725+
+ buffer.features[bufferIndex]
17261726
+ " as term freq while PostingsEnum#freq returns "
17271727
+ freq);
17281728
}

lucene/core/src/java/org/apache/lucene/index/PostingsEnum.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
package org.apache.lucene.index;
1818

1919
import java.io.IOException;
20-
import org.apache.lucene.search.DocAndFreqBuffer;
20+
import org.apache.lucene.search.DocAndFloatFeatureBuffer;
2121
import org.apache.lucene.search.DocIdSetIterator;
2222
import org.apache.lucene.util.BytesRef;
2323

@@ -126,18 +126,18 @@ protected PostingsEnum() {}
126126
* buffer.size = size;
127127
* </pre>
128128
*
129-
* <p><b>NOTE</b>: The provided {@link DocAndFreqBuffer} should not hold references to internal
130-
* data structures.
129+
* <p><b>NOTE</b>: The provided {@link DocAndFloatFeatureBuffer} should not hold references to
130+
* internal data structures.
131131
*
132132
* @lucene.internal
133133
*/
134-
public void nextPostings(int upTo, DocAndFreqBuffer buffer) throws IOException {
134+
public void nextPostings(int upTo, DocAndFloatFeatureBuffer buffer) throws IOException {
135135
int batchSize = 16; // arbitrary
136136
buffer.growNoCopy(batchSize);
137137
int size = 0;
138138
for (int doc = docID(); doc < upTo && size < batchSize; doc = nextDoc()) {
139139
buffer.docs[size] = doc;
140-
buffer.freqs[size] = freq();
140+
buffer.features[size] = freq();
141141
++size;
142142
}
143143
buffer.size = size;

lucene/core/src/java/org/apache/lucene/search/BatchScoreBulkScorer.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
class BatchScoreBulkScorer extends BulkScorer {
2727

2828
private final SimpleScorable scorable = new SimpleScorable();
29-
private final DocAndScoreBuffer buffer = new DocAndScoreBuffer();
29+
private final DocAndFloatFeatureBuffer buffer = new DocAndFloatFeatureBuffer();
3030
private final Scorer scorer;
3131

3232
BatchScoreBulkScorer(Scorer scorer) {
@@ -50,7 +50,7 @@ public int score(LeafCollector collector, Bits acceptDocs, int min, int max) thr
5050
buffer.size > 0;
5151
scorer.nextDocsAndScores(max, acceptDocs, buffer)) {
5252
for (int i = 0, size = buffer.size; i < size; i++) {
53-
float score = scorable.score = buffer.scores[i];
53+
float score = scorable.score = buffer.features[i];
5454
if (score >= scorable.minCompetitiveScore) {
5555
collector.collect(buffer.docs[i]);
5656
}

lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionBulkScorer.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ final class BlockMaxConjunctionBulkScorer extends BulkScorer {
4141
private final DocAndScore scorable = new DocAndScore();
4242
private final double[] sumOfOtherClauses;
4343
private final int maxDoc;
44-
private final DocAndScoreBuffer docAndScoreBuffer = new DocAndScoreBuffer();
44+
private final DocAndFloatFeatureBuffer docAndScoreBuffer = new DocAndFloatFeatureBuffer();
4545
private final DocAndScoreAccBuffer docAndScoreAccBuffer = new DocAndScoreAccBuffer();
4646

4747
BlockMaxConjunctionBulkScorer(int maxDoc, List<Scorer> scorers) throws IOException {

lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ public DisiWrapper get(int i) {
8181
final int minShouldMatch;
8282
final long cost;
8383
final boolean needsScores;
84-
private final DocAndScoreBuffer docAndScoreBuffer = new DocAndScoreBuffer();
84+
private final DocAndFloatFeatureBuffer docAndScoreBuffer = new DocAndFloatFeatureBuffer();
8585

8686
BooleanScorer(Collection<Scorer> scorers, int minShouldMatch, boolean needsScores) {
8787
if (minShouldMatch < 1 || minShouldMatch > scorers.size()) {
@@ -148,7 +148,7 @@ private void scoreWindowIntoBitSetAndReplay(
148148
w.scorer.nextDocsAndScores(max, acceptDocs, docAndScoreBuffer)) {
149149
for (int index = 0; index < docAndScoreBuffer.size; ++index) {
150150
final int doc = docAndScoreBuffer.docs[index];
151-
final float score = docAndScoreBuffer.scores[index];
151+
final float score = docAndScoreBuffer.features[index];
152152
final int d = doc & MASK;
153153
matching.set(d);
154154
final Bucket bucket = buckets[d];

lucene/core/src/java/org/apache/lucene/search/DocAndFreqBuffer.java renamed to lucene/core/src/java/org/apache/lucene/search/DocAndFloatFeatureBuffer.java

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,29 +21,32 @@
2121
import org.apache.lucene.util.IntsRef;
2222

2323
/**
24-
* Wrapper around parallel arrays storing doc IDs and their corresponding frequencies.
24+
* Wrapper around parallel arrays storing doc IDs and their corresponding features, stored as Java
25+
* floats. These features may be anything, but are typically a term frequency or a score.
2526
*
2627
* @lucene.internal
2728
*/
28-
public final class DocAndFreqBuffer {
29+
public final class DocAndFloatFeatureBuffer {
30+
31+
private static final float[] EMPTY_FLOATS = new float[0];
2932

3033
/** Doc IDs */
3134
public int[] docs = IntsRef.EMPTY_INTS;
3235

33-
/** Frequencies */
34-
public int[] freqs = IntsRef.EMPTY_INTS;
36+
/** Float-valued features */
37+
public float[] features = EMPTY_FLOATS;
3538

36-
/** Number of valid entries in the doc ID and frequency arrays. */
39+
/** Number of valid entries in the doc ID and float-valued feature arrays. */
3740
public int size;
3841

3942
/** Sole constructor. */
40-
public DocAndFreqBuffer() {}
43+
public DocAndFloatFeatureBuffer() {}
4144

4245
/** Grow both arrays to ensure that they can store at least the given number of entries. */
4346
public void growNoCopy(int minSize) {
4447
if (docs.length < minSize) {
4548
docs = ArrayUtil.growNoCopy(docs, minSize);
46-
freqs = new int[docs.length];
49+
features = new float[docs.length];
4750
}
4851
}
4952

@@ -53,7 +56,7 @@ public void apply(Bits liveDocs) {
5356
for (int i = 0; i < size; ++i) {
5457
if (liveDocs.get(docs[i])) {
5558
docs[newSize] = docs[i];
56-
freqs[newSize] = freqs[i];
59+
features[newSize] = features[i];
5760
newSize++;
5861
}
5962
}

lucene/core/src/java/org/apache/lucene/search/DocAndScoreAccBuffer.java

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,12 +62,15 @@ public void grow(int minSize) {
6262
}
6363
}
6464

65-
/** Copy content from the given {@link DocAndScoreBuffer}, expanding float scores to doubles. */
66-
public void copyFrom(DocAndScoreBuffer buffer) {
65+
/**
66+
* Copy content from the given {@link DocAndFloatFeatureBuffer}, expanding float scores to
67+
* doubles.
68+
*/
69+
public void copyFrom(DocAndFloatFeatureBuffer buffer) {
6770
growNoCopy(buffer.size);
6871
System.arraycopy(buffer.docs, 0, docs, 0, buffer.size);
6972
for (int i = 0; i < buffer.size; ++i) {
70-
scores[i] = buffer.scores[i];
73+
scores[i] = buffer.features[i];
7174
}
7275
this.size = buffer.size;
7376
}

lucene/core/src/java/org/apache/lucene/search/DocAndScoreBuffer.java

Lines changed: 0 additions & 50 deletions
This file was deleted.

lucene/core/src/java/org/apache/lucene/search/MaxScoreBulkScorer.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ final class MaxScoreBulkScorer extends BulkScorer {
5151
private final FixedBitSet windowMatches = new FixedBitSet(INNER_WINDOW_SIZE);
5252
private final double[] windowScores = new double[INNER_WINDOW_SIZE];
5353

54-
private final DocAndScoreBuffer docAndScoreBuffer = new DocAndScoreBuffer();
54+
private final DocAndFloatFeatureBuffer docAndScoreBuffer = new DocAndFloatFeatureBuffer();
5555
private final DocAndScoreAccBuffer docAndScoreAccBuffer = new DocAndScoreAccBuffer();
5656

5757
MaxScoreBulkScorer(int maxDoc, List<Scorer> scorers, Scorer filter) throws IOException {
@@ -287,7 +287,7 @@ private void scoreInnerWindowMultipleEssentialClauses(
287287
top.scorer.nextDocsAndScores(innerWindowMax, acceptDocs, docAndScoreBuffer)) {
288288
for (int index = 0; index < docAndScoreBuffer.size; ++index) {
289289
final int doc = docAndScoreBuffer.docs[index];
290-
final float score = docAndScoreBuffer.scores[index];
290+
final float score = docAndScoreBuffer.features[index];
291291
final int i = doc - innerWindowMin;
292292
windowMatches.set(i);
293293
windowScores[i] += score;

0 commit comments

Comments
 (0)