Skip to content

Commit a17d2eb

Browse files
authored
LUCENE-10236: Update field-weight used in CombinedFieldQuery scoring calculation (9.1.0 Backporting) (#588)
1 parent 6370892 commit a17d2eb

File tree

4 files changed

+90
-1
lines changed

4 files changed

+90
-1
lines changed

lucene/CHANGES.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,9 @@ Bug Fixes
168168
* LUCENE-10352: Fixed ctor argument checks: JapaneseKatakanaStemFilter,
169169
DoubleMetaphoneFilter (Uwe Schindler, Robert Muir)
170170

171+
* LUCENE-10236: Stop duplicating norms when scoring in CombinedFieldQuery.
172+
(Zach Chen, Jim Ferenczi, Julie Tibshirani)
173+
171174
* LUCENE-10353: Add random null injection to TestRandomChains. (Robert Muir,
172175
Uwe Schindler)
173176

lucene/sandbox/src/java/org/apache/lucene/sandbox/search/CombinedFieldQuery.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -418,7 +418,7 @@ public Scorer scorer(LeafReaderContext context) throws IOException {
418418
}
419419

420420
MultiNormsLeafSimScorer scoringSimScorer =
421-
new MultiNormsLeafSimScorer(simWeight, context.reader(), fields, true);
421+
new MultiNormsLeafSimScorer(simWeight, context.reader(), fieldAndWeights.values(), true);
422422
LeafSimScorer nonScoringSimScorer =
423423
new LeafSimScorer(simWeight, context.reader(), "pseudo_field", false);
424424
// we use termscorers + disjunction as an impl detail

lucene/sandbox/src/java/org/apache/lucene/sandbox/search/MultiNormsLeafSimScorer.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,10 @@
2121
import java.io.IOException;
2222
import java.util.ArrayList;
2323
import java.util.Collection;
24+
import java.util.HashSet;
2425
import java.util.List;
2526
import java.util.Objects;
27+
import java.util.Set;
2628
import org.apache.lucene.index.LeafReader;
2729
import org.apache.lucene.index.NumericDocValues;
2830
import org.apache.lucene.search.Explanation;
@@ -61,7 +63,13 @@ final class MultiNormsLeafSimScorer {
6163
if (needsScores) {
6264
final List<NumericDocValues> normsList = new ArrayList<>();
6365
final List<Float> weightList = new ArrayList<>();
66+
final Set<String> duplicateCheckingSet = new HashSet<>();
6467
for (FieldAndWeight field : normFields) {
68+
assert duplicateCheckingSet.add(field.field)
69+
: "There is a duplicated field ["
70+
+ field.field
71+
+ "] used to construct MultiNormsLeafSimScorer";
72+
6573
NumericDocValues norms = reader.getNormValues(field.field);
6674
if (norms != null) {
6775
normsList.add(norms);

lucene/sandbox/src/test/org/apache/lucene/sandbox/search/TestCombinedFieldQuery.java

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@
1616
*/
1717
package org.apache.lucene.sandbox.search;
1818

19+
import static com.carrotsearch.randomizedtesting.RandomizedTest.atMost;
20+
import static com.carrotsearch.randomizedtesting.RandomizedTest.randomBoolean;
21+
import static com.carrotsearch.randomizedtesting.RandomizedTest.randomIntBetween;
22+
1923
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
2024
import java.io.IOException;
2125
import java.util.Arrays;
@@ -165,6 +169,80 @@ public void testSameScore() throws IOException {
165169
dir.close();
166170
}
167171

172+
public void testScoringWithMultipleFieldTermsMatch() throws IOException {
173+
int numMatchDoc = randomIntBetween(100, 500);
174+
int numHits = atMost(100);
175+
int boost1 = Math.max(1, random().nextInt(5));
176+
int boost2 = Math.max(1, random().nextInt(5));
177+
178+
Directory dir = newDirectory();
179+
Similarity similarity = randomCompatibleSimilarity();
180+
181+
IndexWriterConfig iwc = new IndexWriterConfig();
182+
iwc.setSimilarity(similarity);
183+
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
184+
185+
// adding potentially matching doc
186+
for (int i = 0; i < numMatchDoc; i++) {
187+
Document doc = new Document();
188+
189+
int freqA = random().nextInt(20) + 1;
190+
for (int j = 0; j < freqA; j++) {
191+
doc.add(new TextField("a", "foo", Store.NO));
192+
}
193+
194+
freqA = random().nextInt(20) + 1;
195+
if (randomBoolean()) {
196+
for (int j = 0; j < freqA; j++) {
197+
doc.add(new TextField("a", "foo" + j, Store.NO));
198+
}
199+
}
200+
201+
freqA = random().nextInt(20) + 1;
202+
for (int j = 0; j < freqA; j++) {
203+
doc.add(new TextField("a", "zoo", Store.NO));
204+
}
205+
206+
int freqB = random().nextInt(20) + 1;
207+
for (int j = 0; j < freqB; j++) {
208+
doc.add(new TextField("b", "zoo", Store.NO));
209+
}
210+
211+
freqB = random().nextInt(20) + 1;
212+
if (randomBoolean()) {
213+
for (int j = 0; j < freqB; j++) {
214+
doc.add(new TextField("b", "zoo" + j, Store.NO));
215+
}
216+
}
217+
218+
int freqC = random().nextInt(20) + 1;
219+
for (int j = 0; j < freqC; j++) {
220+
doc.add(new TextField("c", "bla" + j, Store.NO));
221+
}
222+
w.addDocument(doc);
223+
}
224+
225+
IndexReader reader = w.getReader();
226+
IndexSearcher searcher = newSearcher(reader);
227+
searcher.setSimilarity(similarity);
228+
229+
CombinedFieldQuery query =
230+
new CombinedFieldQuery.Builder()
231+
.addField("a", (float) boost1)
232+
.addField("b", (float) boost2)
233+
.addTerm(new BytesRef("foo"))
234+
.addTerm(new BytesRef("zoo"))
235+
.build();
236+
237+
TopScoreDocCollector completeCollector =
238+
TopScoreDocCollector.create(numHits, null, Integer.MAX_VALUE);
239+
searcher.search(query, completeCollector);
240+
241+
reader.close();
242+
w.close();
243+
dir.close();
244+
}
245+
168246
public void testNormsDisabled() throws IOException {
169247
Directory dir = newDirectory();
170248
Similarity similarity = randomCompatibleSimilarity();

0 commit comments

Comments
 (0)