Skip to content

Commit 42c7956

Browse files
jpountzromseygeek
andcommitted
Merge DocAndFreqBuffer and DocAndScoreBuffer. (#14748)
I initially kept those separated but I'm now coming to the conclusion that they should be merged: - While frequencies are ints today, some queries produce float frequencies (`SloppyPhraseQuery`, `CombinedFieldQuery`). - `TermScorer` no longer needs to keep track of a separate buffer for frequencies. Co-authored-by: Alan Woodward <[email protected]>
1 parent 224c901 commit 42c7956

File tree

16 files changed

+59
-109
lines changed

16 files changed

+59
-109
lines changed

lucene/core/src/java/org/apache/lucene/codecs/lucene103/Lucene103PostingsReader.java

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@
4646
import org.apache.lucene.index.SegmentReadState;
4747
import org.apache.lucene.internal.vectorization.PostingDecodingUtil;
4848
import org.apache.lucene.internal.vectorization.VectorizationProvider;
49-
import org.apache.lucene.search.DocAndFreqBuffer;
49+
import org.apache.lucene.search.DocAndFloatFeatureBuffer;
5050
import org.apache.lucene.store.ByteArrayDataInput;
5151
import org.apache.lucene.store.ChecksumIndexInput;
5252
import org.apache.lucene.store.DataInput;
@@ -1043,7 +1043,7 @@ public void intoBitSet(int upTo, FixedBitSet bitSet, int offset) throws IOExcept
10431043
}
10441044

10451045
@Override
1046-
public void nextPostings(int upTo, DocAndFreqBuffer buffer) throws IOException {
1046+
public void nextPostings(int upTo, DocAndFloatFeatureBuffer buffer) throws IOException {
10471047
assert needsRefilling == false;
10481048

10491049
if (needsFreq == false) {
@@ -1080,7 +1080,9 @@ public void nextPostings(int upTo, DocAndFreqBuffer buffer) throws IOException {
10801080
}
10811081

10821082
assert buffer.size > 0;
1083-
System.arraycopy(freqBuffer, start, buffer.freqs, 0, buffer.size);
1083+
for (int i = 0; i < buffer.size; ++i) {
1084+
buffer.features[i] = freqBuffer[start + i];
1085+
}
10841086

10851087
advance(upTo);
10861088
}

lucene/core/src/java/org/apache/lucene/index/CheckIndex.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@
5858
import org.apache.lucene.index.CheckIndex.Status.DocValuesStatus;
5959
import org.apache.lucene.index.PointValues.IntersectVisitor;
6060
import org.apache.lucene.index.PointValues.Relation;
61-
import org.apache.lucene.search.DocAndFreqBuffer;
61+
import org.apache.lucene.search.DocAndFloatFeatureBuffer;
6262
import org.apache.lucene.search.DocIdSetIterator;
6363
import org.apache.lucene.search.FieldExistsQuery;
6464
import org.apache.lucene.search.KnnCollector;
@@ -1614,7 +1614,7 @@ private static Status.TermIndexStatus checkFields(
16141614
postings = termsEnum.postings(postings, PostingsEnum.ALL);
16151615
bulkPostings = termsEnum.postings(bulkPostings, PostingsEnum.ALL);
16161616
bulkPostings.nextDoc();
1617-
DocAndFreqBuffer buffer = new DocAndFreqBuffer();
1617+
DocAndFloatFeatureBuffer buffer = new DocAndFloatFeatureBuffer();
16181618
int bufferIndex = 0;
16191619

16201620
if (hasFreqs == false) {
@@ -1680,10 +1680,10 @@ private static Status.TermIndexStatus checkFields(
16801680
+ " as next doc while PostingsEnum#nextDoc returns "
16811681
+ doc);
16821682
}
1683-
if (freq != buffer.freqs[bufferIndex]) {
1683+
if (freq != buffer.features[bufferIndex]) {
16841684
throw new CheckIndexException(
16851685
"PostingsEnum#nextPostings returns "
1686-
+ buffer.freqs[bufferIndex]
1686+
+ buffer.features[bufferIndex]
16871687
+ " as term freq while PostingsEnum#freq returns "
16881688
+ freq);
16891689
}

lucene/core/src/java/org/apache/lucene/index/PostingsEnum.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
package org.apache.lucene.index;
1818

1919
import java.io.IOException;
20-
import org.apache.lucene.search.DocAndFreqBuffer;
20+
import org.apache.lucene.search.DocAndFloatFeatureBuffer;
2121
import org.apache.lucene.search.DocIdSetIterator;
2222
import org.apache.lucene.util.BytesRef;
2323

@@ -126,18 +126,18 @@ protected PostingsEnum() {}
126126
* buffer.size = size;
127127
* </pre>
128128
*
129-
* <p><b>NOTE</b>: The provided {@link DocAndFreqBuffer} should not hold references to internal
130-
* data structures.
129+
* <p><b>NOTE</b>: The provided {@link DocAndFloatFeatureBuffer} should not hold references to
130+
* internal data structures.
131131
*
132132
* @lucene.internal
133133
*/
134-
public void nextPostings(int upTo, DocAndFreqBuffer buffer) throws IOException {
134+
public void nextPostings(int upTo, DocAndFloatFeatureBuffer buffer) throws IOException {
135135
int batchSize = 16; // arbitrary
136136
buffer.growNoCopy(batchSize);
137137
int size = 0;
138138
for (int doc = docID(); doc < upTo && size < batchSize; doc = nextDoc()) {
139139
buffer.docs[size] = doc;
140-
buffer.freqs[size] = freq();
140+
buffer.features[size] = freq();
141141
++size;
142142
}
143143
buffer.size = size;

lucene/core/src/java/org/apache/lucene/search/BatchScoreBulkScorer.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
class BatchScoreBulkScorer extends BulkScorer {
2727

2828
private final SimpleScorable scorable = new SimpleScorable();
29-
private final DocAndScoreBuffer buffer = new DocAndScoreBuffer();
29+
private final DocAndFloatFeatureBuffer buffer = new DocAndFloatFeatureBuffer();
3030
private final Scorer scorer;
3131

3232
BatchScoreBulkScorer(Scorer scorer) {
@@ -50,7 +50,7 @@ public int score(LeafCollector collector, Bits acceptDocs, int min, int max) thr
5050
buffer.size > 0;
5151
scorer.nextDocsAndScores(max, acceptDocs, buffer)) {
5252
for (int i = 0, size = buffer.size; i < size; i++) {
53-
float score = scorable.score = buffer.scores[i];
53+
float score = scorable.score = buffer.features[i];
5454
if (score >= scorable.minCompetitiveScore) {
5555
collector.collect(buffer.docs[i]);
5656
}

lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionBulkScorer.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ final class BlockMaxConjunctionBulkScorer extends BulkScorer {
4141
private final DocAndScore scorable = new DocAndScore();
4242
private final double[] sumOfOtherClauses;
4343
private final int maxDoc;
44-
private final DocAndScoreBuffer docAndScoreBuffer = new DocAndScoreBuffer();
44+
private final DocAndFloatFeatureBuffer docAndScoreBuffer = new DocAndFloatFeatureBuffer();
4545
private final DocAndScoreAccBuffer docAndScoreAccBuffer = new DocAndScoreAccBuffer();
4646

4747
BlockMaxConjunctionBulkScorer(int maxDoc, List<Scorer> scorers) throws IOException {

lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ public DisiWrapper get(int i) {
8181
final int minShouldMatch;
8282
final long cost;
8383
final boolean needsScores;
84-
private final DocAndScoreBuffer docAndScoreBuffer = new DocAndScoreBuffer();
84+
private final DocAndFloatFeatureBuffer docAndScoreBuffer = new DocAndFloatFeatureBuffer();
8585

8686
BooleanScorer(Collection<Scorer> scorers, int minShouldMatch, boolean needsScores) {
8787
if (minShouldMatch < 1 || minShouldMatch > scorers.size()) {
@@ -148,7 +148,7 @@ private void scoreWindowIntoBitSetAndReplay(
148148
w.scorer.nextDocsAndScores(max, acceptDocs, docAndScoreBuffer)) {
149149
for (int index = 0; index < docAndScoreBuffer.size; ++index) {
150150
final int doc = docAndScoreBuffer.docs[index];
151-
final float score = docAndScoreBuffer.scores[index];
151+
final float score = docAndScoreBuffer.features[index];
152152
final int d = doc & MASK;
153153
matching.set(d);
154154
final Bucket bucket = buckets[d];

lucene/core/src/java/org/apache/lucene/search/DocAndFreqBuffer.java renamed to lucene/core/src/java/org/apache/lucene/search/DocAndFloatFeatureBuffer.java

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,29 +21,32 @@
2121
import org.apache.lucene.util.IntsRef;
2222

2323
/**
24-
* Wrapper around parallel arrays storing doc IDs and their corresponding frequencies.
24+
* Wrapper around parallel arrays storing doc IDs and their corresponding features, stored as Java
25+
* floats. These features may be anything, but are typically a term frequency or a score.
2526
*
2627
* @lucene.internal
2728
*/
28-
public final class DocAndFreqBuffer {
29+
public final class DocAndFloatFeatureBuffer {
30+
31+
private static final float[] EMPTY_FLOATS = new float[0];
2932

3033
/** Doc IDs */
3134
public int[] docs = IntsRef.EMPTY_INTS;
3235

33-
/** Frequencies */
34-
public int[] freqs = IntsRef.EMPTY_INTS;
36+
/** Float-valued features */
37+
public float[] features = EMPTY_FLOATS;
3538

36-
/** Number of valid entries in the doc ID and frequency arrays. */
39+
/** Number of valid entries in the doc ID and float-valued feature arrays. */
3740
public int size;
3841

3942
/** Sole constructor. */
40-
public DocAndFreqBuffer() {}
43+
public DocAndFloatFeatureBuffer() {}
4144

4245
/** Grow both arrays to ensure that they can store at least the given number of entries. */
4346
public void growNoCopy(int minSize) {
4447
if (docs.length < minSize) {
4548
docs = ArrayUtil.growNoCopy(docs, minSize);
46-
freqs = new int[docs.length];
49+
features = new float[docs.length];
4750
}
4851
}
4952

@@ -53,7 +56,7 @@ public void apply(Bits liveDocs) {
5356
for (int i = 0; i < size; ++i) {
5457
if (liveDocs.get(docs[i])) {
5558
docs[newSize] = docs[i];
56-
freqs[newSize] = freqs[i];
59+
features[newSize] = features[i];
5760
newSize++;
5861
}
5962
}

lucene/core/src/java/org/apache/lucene/search/DocAndScoreAccBuffer.java

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,12 +62,15 @@ public void grow(int minSize) {
6262
}
6363
}
6464

65-
/** Copy content from the given {@link DocAndScoreBuffer}, expanding float scores to doubles. */
66-
public void copyFrom(DocAndScoreBuffer buffer) {
65+
/**
66+
* Copy content from the given {@link DocAndFloatFeatureBuffer}, expanding float scores to
67+
* doubles.
68+
*/
69+
public void copyFrom(DocAndFloatFeatureBuffer buffer) {
6770
growNoCopy(buffer.size);
6871
System.arraycopy(buffer.docs, 0, docs, 0, buffer.size);
6972
for (int i = 0; i < buffer.size; ++i) {
70-
scores[i] = buffer.scores[i];
73+
scores[i] = buffer.features[i];
7174
}
7275
this.size = buffer.size;
7376
}

lucene/core/src/java/org/apache/lucene/search/DocAndScoreBuffer.java

Lines changed: 0 additions & 50 deletions
This file was deleted.

lucene/core/src/java/org/apache/lucene/search/MaxScoreBulkScorer.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ final class MaxScoreBulkScorer extends BulkScorer {
5151
private final FixedBitSet windowMatches = new FixedBitSet(INNER_WINDOW_SIZE);
5252
private final double[] windowScores = new double[INNER_WINDOW_SIZE];
5353

54-
private final DocAndScoreBuffer docAndScoreBuffer = new DocAndScoreBuffer();
54+
private final DocAndFloatFeatureBuffer docAndScoreBuffer = new DocAndFloatFeatureBuffer();
5555
private final DocAndScoreAccBuffer docAndScoreAccBuffer = new DocAndScoreAccBuffer();
5656

5757
MaxScoreBulkScorer(int maxDoc, List<Scorer> scorers, Scorer filter) throws IOException {
@@ -287,7 +287,7 @@ private void scoreInnerWindowMultipleEssentialClauses(
287287
top.scorer.nextDocsAndScores(innerWindowMax, acceptDocs, docAndScoreBuffer)) {
288288
for (int index = 0; index < docAndScoreBuffer.size; ++index) {
289289
final int doc = docAndScoreBuffer.docs[index];
290-
final float score = docAndScoreBuffer.scores[index];
290+
final float score = docAndScoreBuffer.features[index];
291291
final int i = doc - innerWindowMin;
292292
windowMatches.set(i);
293293
windowScores[i] += score;

0 commit comments

Comments
 (0)