Skip to content

Commit 26b1188

Browse files
authored
highlighter, fastvector: improve/fix merging of boosts to improve snippet ranking (#15434)
Fix QueryPhraseMap.markTerminal() boost override due to conflicting query expansion. When a query includes overlapping phrases, the expansion process may generate duplicate phrases—one with the original (possibly high) user-defined boost, and another one with the boost of 1. As a result, the final boost value assigned to the QueryPhraseMap may be incorrect, since it is determined by whichever duplicate is processed last during the creation of the QueryPhraseMap in the markTerminal method.
1 parent f996e74 commit 26b1188

File tree

3 files changed

+46
-1
lines changed

3 files changed

+46
-1
lines changed

lucene/CHANGES.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,8 @@ Bug Fixes
112112

113113
* GITHUB#15125: Handle inconsistent schema on flush with index sorts (Nhat Nguyen)
114114

115+
* GITHUB#15434: Don't override boost of conflicting expanded phrases by taking the max boost in markTerminal for FVHighlighter (Luana Fragoso)
116+
115117
Changes in Runtime Behavior
116118
---------------------
117119
* GITHUB#14187: The query cache is now disabled by default. (Adrien Grand)

lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -429,7 +429,7 @@ private void markTerminal(float boost) {
429429
private void markTerminal(int slop, float boost) {
430430
this.terminal = true;
431431
this.slop = slop;
432-
this.boost = boost;
432+
this.boost = Math.max(this.boost, boost);
433433
this.termOrPhraseNumber = fieldQuery.nextTermOrPhraseNumber();
434434
}
435435

lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/TestFieldQuery.java

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import java.io.IOException;
2020
import java.util.ArrayList;
2121
import java.util.HashSet;
22+
import java.util.LinkedHashSet;
2223
import java.util.List;
2324
import java.util.Map;
2425
import java.util.Set;
@@ -814,6 +815,48 @@ public void testQueryPhraseMapOverlap2gram() throws Exception {
814815
assertEquals(0, qpm2.subMap.size());
815816
}
816817

818+
public void testQueryPhraseMapDuplicate() throws IOException {
819+
BooleanQuery.Builder query = new BooleanQuery.Builder();
820+
Query bq = toPhraseQuery(analyze("a b c", F, analyzerB), F);
821+
bq = new BoostQuery(bq, 100);
822+
query.add(bq, Occur.SHOULD);
823+
824+
bq = toPhraseQuery(analyze("a b", F, analyzerB), F);
825+
bq = new BoostQuery(bq, 20);
826+
query.add(bq, Occur.SHOULD);
827+
828+
bq = toPhraseQuery(analyze("b c", F, analyzerB), F);
829+
bq = new BoostQuery(bq, 50);
830+
query.add(bq, Occur.SHOULD);
831+
832+
bq = query.build();
833+
FieldQuery fq = new FieldQuery(bq, true, true);
834+
Set<Query> flatQueries = new LinkedHashSet<>();
835+
fq.flatten(bq, searcher, flatQueries, 1f);
836+
837+
assertCollectionQueries(
838+
fq.expand(flatQueries),
839+
pqF(100, "a", "b", "c"),
840+
pqF(20, "a", "b"),
841+
// "a b c": 1 -> expanded from "a b" + "b c"
842+
new BoostQuery(pqF(1f, "a", "b", "c"), 1f),
843+
pqF(50, "b", "c"));
844+
845+
Map<String, QueryPhraseMap> map = fq.rootMaps;
846+
QueryPhraseMap a_qpm = map.get("f").subMap.get("a");
847+
assertEquals(0, a_qpm.boost, 0.0);
848+
QueryPhraseMap b_qpm = a_qpm.subMap.get("b");
849+
assertEquals(20, b_qpm.boost, 0.0);
850+
QueryPhraseMap c_qpm = b_qpm.subMap.get("c");
851+
// make sure final boost is from the query and not the expanded boost 1
852+
assertEquals(100, c_qpm.boost, 0.0);
853+
854+
b_qpm = map.get("f").subMap.get("b");
855+
assertEquals(0, b_qpm.boost, 0.0);
856+
c_qpm = b_qpm.subMap.get("c");
857+
assertEquals(50, c_qpm.boost, 0.0);
858+
}
859+
817860
public void testSearchPhrase() throws Exception {
818861
Query query = pqF("a", "b", "c");
819862

0 commit comments

Comments
 (0)