Skip to content

Commit d05d6fe

Browse files
authored
Provide better impacts for fields indexed with IndexOptions.DOCS (#14511)
Postings always return impacts with freq=Integer.MAX_VALUE and norm=1 when frequencies are not indexed (IndexOptions.DOCS). This significantly overestimates the score upper bound of term queries, since the similarity scorer is effectively called with freq=1 all the time in this case (and either norm=1 if norms are not indexed, or the number of terms in the field otherwise). This updates postings to always return impacts with freq=1 and norm=1 when frequencies are not indexed, which helps compute better score upper bounds, and in-turn makes dynamic pruning perform better. Closes #14445
1 parent d72021a commit d05d6fe

File tree

3 files changed

+26
-14
lines changed

3 files changed

+26
-14
lines changed

lucene/CHANGES.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,8 @@ Bug Fixes
114114
* GITHUB#14523, GITHUB#14530: Correct TermOrdValComparator competitive iterator so that it forces sparse
115115
field iteration to be at least scoring window baseline when doing intoBitSet. (Ben Trent, Adrien Grand)
116116

117+
* GITHUB#14445: Provide better impacts for fields indexed with IndexOptions.DOCS GITHUB#14511 (Aniketh Jain)
118+
117119
* GITHUB#14543: Fixed lead cost computations for bulk scorers of conjunctive
118120
queries that mix MUST and FILTER clauses, and disjunctive queries that
119121
configure a minimum number of matching SHOULD clauses.

lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene101/Lucene101PostingsReader.java

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,10 @@ public final class Lucene101PostingsReader extends PostingsReaderBase {
7070
private static final List<Impact> DUMMY_IMPACTS =
7171
Collections.singletonList(new Impact(Integer.MAX_VALUE, 1L));
7272

73+
// We stopped storing a placeholder impact with freq=1 for fields with DOCS after 9.12.0
74+
private static final List<Impact> DUMMY_IMPACTS_NO_FREQS =
75+
Collections.singletonList(new Impact(1, 1L));
76+
7377
private final IndexInput docIn;
7478
private final IndexInput posIn;
7579
private final IndexInput payIn;
@@ -1325,13 +1329,14 @@ public int getDocIdUpTo(int level) {
13251329

13261330
@Override
13271331
public List<Impact> getImpacts(int level) {
1328-
if (indexHasFreq) {
1329-
if (level == 0 && level0LastDocID != NO_MORE_DOCS) {
1330-
return readImpacts(level0SerializedImpacts, level0Impacts);
1331-
}
1332-
if (level == 1) {
1333-
return readImpacts(level1SerializedImpacts, level1Impacts);
1334-
}
1332+
if (indexHasFreq == false) {
1333+
return DUMMY_IMPACTS_NO_FREQS;
1334+
}
1335+
if (level == 0 && level0LastDocID != NO_MORE_DOCS) {
1336+
return readImpacts(level0SerializedImpacts, level0Impacts);
1337+
}
1338+
if (level == 1) {
1339+
return readImpacts(level1SerializedImpacts, level1Impacts);
13351340
}
13361341
return DUMMY_IMPACTS;
13371342
}

lucene/core/src/java/org/apache/lucene/codecs/lucene103/Lucene103PostingsReader.java

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,10 @@ public final class Lucene103PostingsReader extends PostingsReaderBase {
7373
private static final List<Impact> DUMMY_IMPACTS =
7474
Collections.singletonList(new Impact(Integer.MAX_VALUE, 1L));
7575

76+
// We stopped storing a placeholder impact with freq=1 for fields with DOCS after 9.12.0
77+
private static final List<Impact> DUMMY_IMPACTS_NO_FREQS =
78+
Collections.singletonList(new Impact(1, 1L));
79+
7680
private final IndexInput docIn;
7781
private final IndexInput posIn;
7882
private final IndexInput payIn;
@@ -1302,13 +1306,14 @@ public int getDocIdUpTo(int level) {
13021306

13031307
@Override
13041308
public List<Impact> getImpacts(int level) {
1305-
if (indexHasFreq) {
1306-
if (level == 0 && level0LastDocID != NO_MORE_DOCS) {
1307-
return readImpacts(level0SerializedImpacts, level0Impacts);
1308-
}
1309-
if (level == 1) {
1310-
return readImpacts(level1SerializedImpacts, level1Impacts);
1311-
}
1309+
if (indexHasFreq == false) {
1310+
return DUMMY_IMPACTS_NO_FREQS;
1311+
}
1312+
if (level == 0 && level0LastDocID != NO_MORE_DOCS) {
1313+
return readImpacts(level0SerializedImpacts, level0Impacts);
1314+
}
1315+
if (level == 1) {
1316+
return readImpacts(level1SerializedImpacts, level1Impacts);
13121317
}
13131318
return DUMMY_IMPACTS;
13141319
}

0 commit comments

Comments
 (0)