Skip to content

Commit 01425df

Browse files
authored
Omit maxScoreCollector for field collapsing when sort by score descending (opensearch-project#19181)
* Omit maxScoreCollector for field collapsing when sort by score descending Signed-off-by: Binlong Gao <[email protected]> * Modify change log Signed-off-by: Binlong Gao <[email protected]> * Add yaml rest test Signed-off-by: Binlong Gao <[email protected]> --------- Signed-off-by: Binlong Gao <[email protected]>
1 parent 63eeb0d commit 01425df

File tree

4 files changed

+160
-11
lines changed

4 files changed

+160
-11
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
3939
- Replace centos:8 with almalinux:8 since centos docker images are deprecated ([#19154](https://github.com/opensearch-project/OpenSearch/pull/19154))
4040
- Add CompletionStage variants to IndicesAdminClient as an alternative to ActionListener ([#19161](https://github.com/opensearch-project/OpenSearch/pull/19161))
4141
- Remove cap on Java version used by forbidden APIs ([#19163](https://github.com/opensearch-project/OpenSearch/pull/19163))
42+
- Omit maxScoreCollector for field collapsing when sort by score descending ([#19181](https://github.com/opensearch-project/OpenSearch/pull/19181))
4243
- Disable pruning for `doc_values` for the wildcard field mapper ([#18568](https://github.com/opensearch-project/OpenSearch/pull/18568))
4344
- Make all methods in Engine.Result public ([#19276](https://github.com/opensearch-project/OpenSearch/pull/19275))
4445
- Create and attach interclusterTest and yamlRestTest code coverage reports to gradle check task([#19165](https://github.com/opensearch-project/OpenSearch/pull/19165))

rest-api-spec/src/main/resources/rest-api-spec/test/search/400_max_score.yml

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,3 +91,71 @@ teardown:
9191
- match: { hits.total: 2 }
9292
- length: { hits.hits: 2 }
9393
- match: { hits.max_score: null }
94+
95+
96+
# related issue: https://github.com/opensearch-project/OpenSearch/issues/19179
97+
---
98+
"Test max score with sorting on score primarily for field collapsing":
99+
- skip:
100+
version: " - 3.2.99"
101+
reason: Fixed in 3.3.0
102+
103+
- do:
104+
search:
105+
rest_total_hits_as_int: true
106+
index: test_1
107+
body:
108+
query: { term: { foo: bar} }
109+
collapse: { field: foo }
110+
sort: [{ _score: desc }, { _doc: desc }]
111+
- match: { hits.total: 2 }
112+
- length: { hits.hits: 1 }
113+
- match: { hits.max_score: 1.0 }
114+
115+
- do:
116+
search:
117+
rest_total_hits_as_int: true
118+
index: test_1
119+
body:
120+
query: { term: { foo: bar} }
121+
collapse: { field: foo }
122+
sort: [{ _score: asc }, { _doc: desc }]
123+
- match: { hits.total: 2 }
124+
- length: { hits.hits: 1 }
125+
- match: { hits.max_score: null }
126+
127+
---
128+
"Test max score with sorting on score firstly primarily for field collapsing with concurrent segment search enabled":
129+
- skip:
130+
version: " - 3.2.99"
131+
reason: Fixed in 3.3.0
132+
133+
- do:
134+
indices.put_settings:
135+
index: test_1
136+
body:
137+
index.search.concurrent_segment_search.mode: 'all'
138+
139+
- do:
140+
search:
141+
rest_total_hits_as_int: true
142+
index: test_1
143+
body:
144+
query: { term: { foo: bar} }
145+
collapse: { field: foo }
146+
sort: [{ _score: desc }, { _doc: desc }]
147+
- match: { hits.total: 2 }
148+
- length: { hits.hits: 1 }
149+
- match: { hits.max_score: 1.0 }
150+
151+
- do:
152+
search:
153+
rest_total_hits_as_int: true
154+
index: test_1
155+
body:
156+
query: { term: { foo: bar} }
157+
collapse: { field: foo }
158+
sort: [{ _score: asc }, { _doc: desc }]
159+
- match: { hits.total: 2 }
160+
- length: { hits.hits: 1 }
161+
- match: { hits.max_score: null }

server/src/main/java/org/opensearch/search/query/TopDocsCollectorContext.java

Lines changed: 30 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@
8686
import java.util.Arrays;
8787
import java.util.Collection;
8888
import java.util.Objects;
89+
import java.util.function.Function;
8990
import java.util.function.Supplier;
9091

9192
import static org.opensearch.search.profile.query.CollectorResult.REASON_SEARCH_COUNT;
@@ -228,10 +229,11 @@ static class CollapsingTopDocsCollectorContext extends TopDocsCollectorContext {
228229
private final DocValueFormat[] sortFmt;
229230
private final CollapsingTopDocsCollector<?> topDocsCollector;
230231
private final Collector collector;
231-
private final Supplier<Float> maxScoreSupplier;
232+
private final Function<CollapseTopFieldDocs, Float> maxScoreSupplier;
232233
private final CollapseContext collapseContext;
233234
private final boolean trackMaxScore;
234235
private final Sort sort;
236+
private final boolean sortByScore;
235237
private final FieldDoc searchAfter;
236238

237239
/**
@@ -274,14 +276,24 @@ private CollapsingTopDocsCollectorContext(
274276
this.searchAfter = searchAfter;
275277
this.topDocsCollector = collapseContext.createTopDocs(sort, numHits, searchAfter);
276278
this.trackMaxScore = trackMaxScore;
279+
this.sortByScore = sortAndFormats == null || SortField.FIELD_SCORE.equals(sortAndFormats.sort.getSort()[0]);
277280

278-
MaxScoreCollector maxScoreCollector;
279-
if (trackMaxScore) {
281+
final MaxScoreCollector maxScoreCollector;
282+
if (sortByScore) {
283+
maxScoreCollector = null;
284+
maxScoreSupplier = (topDocs) -> {
285+
if (topDocs.scoreDocs.length == 0) {
286+
return Float.NaN;
287+
} else {
288+
return topDocs.scoreDocs[0].score;
289+
}
290+
};
291+
} else if (trackMaxScore) {
280292
maxScoreCollector = new MaxScoreCollector();
281-
maxScoreSupplier = maxScoreCollector::getMaxScore;
293+
maxScoreSupplier = (topDocs) -> maxScoreCollector.getMaxScore();
282294
} else {
283295
maxScoreCollector = null;
284-
maxScoreSupplier = () -> Float.NaN;
296+
maxScoreSupplier = (topDocs) -> Float.NaN;
285297
}
286298

287299
this.collector = MultiCollector.wrap(topDocsCollector, maxScoreCollector);
@@ -296,7 +308,7 @@ Collector create(Collector in) throws IOException {
296308
@Override
297309
void postProcess(QuerySearchResult result) throws IOException {
298310
final CollapseTopFieldDocs topDocs = topDocsCollector.getTopDocs();
299-
result.topDocs(new TopDocsAndMaxScore(topDocs, maxScoreSupplier.get()), sortFmt);
311+
result.topDocs(new TopDocsAndMaxScore(topDocs, maxScoreSupplier.apply(topDocs)), sortFmt);
300312
}
301313

302314
@Override
@@ -305,8 +317,8 @@ CollectorManager<?, ReduceableSearchResult> createManager(CollectorManager<?, Re
305317
@Override
306318
public Collector newCollector() throws IOException {
307319
MaxScoreCollector maxScoreCollector = null;
308-
309-
if (trackMaxScore) {
320+
// if sort by score in descending order, MaxScoreCollector is not needed
321+
if (!sortByScore && trackMaxScore) {
310322
maxScoreCollector = new MaxScoreCollector();
311323
}
312324

@@ -353,7 +365,15 @@ protected ReduceableSearchResult reduceWith(final Collection<CollapseTopFieldDoc
353365
numHits,
354366
topFieldDocs.toArray(new CollapseTopFieldDocs[0])
355367
);
356-
result.topDocs(new TopDocsAndMaxScore(topDocs, maxScore), sortFmt);
368+
TopDocsAndMaxScore topDocsAndMaxScore;
369+
// if sort by score in descending order, we can get the max score from the first matched document directly
370+
// if no matched document, max score is Float.NaN
371+
if (sortByScore && topDocs.scoreDocs.length > 0) {
372+
topDocsAndMaxScore = new TopDocsAndMaxScore(topDocs, topDocs.scoreDocs[0].score);
373+
} else {
374+
topDocsAndMaxScore = new TopDocsAndMaxScore(topDocs, maxScore);
375+
}
376+
result.topDocs(topDocsAndMaxScore, sortFmt);
357377
};
358378
}
359379
}
@@ -835,13 +855,12 @@ public static TopDocsCollectorContext createTopDocsCollectorContext(SearchContex
835855
hasFilterCollector
836856
);
837857
} else if (searchContext.collapse() != null) {
838-
boolean trackScores = searchContext.sort() == null ? true : searchContext.trackScores();
839858
int numDocs = Math.min(searchContext.from() + searchContext.size(), totalNumDocs);
840859
return new CollapsingTopDocsCollectorContext(
841860
searchContext.collapse(),
842861
searchContext.sort(),
843862
numDocs,
844-
trackScores,
863+
searchContext.trackScores(),
845864
searchContext.searchAfter()
846865
);
847866
} else {

server/src/test/java/org/opensearch/search/query/QueryPhaseTests.java

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1088,6 +1088,67 @@ public void testMaxScore() throws Exception {
10881088
dir.close();
10891089
}
10901090

1091+
public void testMaxScoreWithSortOnScoreAndCollapsingResults() throws Exception {
1092+
Directory dir = newDirectory();
1093+
IndexWriterConfig iwc = newIndexWriterConfig();
1094+
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
1095+
1096+
// Always end up with uneven buckets so collapsing is predictable
1097+
final int numDocs = 2 * scaledRandomIntBetween(600, 900) - 1;
1098+
for (int i = 0; i < numDocs; i++) {
1099+
Document doc = new Document();
1100+
doc.add(new StringField("foo", "bar", Store.NO));
1101+
doc.add(new NumericDocValuesField("user", i & 1));
1102+
w.addDocument(doc);
1103+
}
1104+
w.close();
1105+
1106+
IndexReader reader = DirectoryReader.open(dir);
1107+
QueryShardContext queryShardContext = mock(QueryShardContext.class);
1108+
when(queryShardContext.fieldMapper("user")).thenReturn(
1109+
new NumberFieldType("user", NumberType.INTEGER, true, false, true, false, null, Collections.emptyMap())
1110+
);
1111+
1112+
TestSearchContext context = new TestSearchContext(queryShardContext, indexShard, newContextSearcher(reader, executor));
1113+
context.collapse(new CollapseBuilder("user").build(context.getQueryShardContext()));
1114+
context.trackScores(false);
1115+
context.parsedQuery(new ParsedQuery(new TermQuery(new Term("foo", "bar"))));
1116+
context.setTask(new SearchShardTask(123L, "", "", "", null, Collections.emptyMap()));
1117+
context.setSize(2);
1118+
context.trackTotalHitsUpTo(5);
1119+
1120+
QueryPhase.executeInternal(context.withCleanQueryResult(), queryPhaseSearcher);
1121+
assertFalse(Float.isNaN(context.queryResult().getMaxScore()));
1122+
assertEquals(2, context.queryResult().topDocs().topDocs.scoreDocs.length);
1123+
assertThat(context.queryResult().topDocs().topDocs.totalHits.value(), equalTo((long) numDocs));
1124+
assertThat(context.queryResult().topDocs().topDocs, instanceOf(CollapseTopFieldDocs.class));
1125+
1126+
CollapseTopFieldDocs topDocs = (CollapseTopFieldDocs) context.queryResult().topDocs().topDocs;
1127+
assertThat(topDocs.collapseValues.length, equalTo(2));
1128+
assertThat(topDocs.collapseValues[0], equalTo(0L)); // user == 0
1129+
assertThat(topDocs.collapseValues[1], equalTo(1L)); // user == 1
1130+
1131+
assertThat(context.queryResult().topDocs().topDocs.scoreDocs[0].score, equalTo(context.queryResult().getMaxScore()));
1132+
1133+
Sort sort = new Sort(new SortField(null, SortField.Type.SCORE), new SortField(null, SortField.Type.DOC));
1134+
SortAndFormats sortAndFormats = new SortAndFormats(sort, new DocValueFormat[] { DocValueFormat.RAW, DocValueFormat.RAW });
1135+
context.sort(sortAndFormats);
1136+
QueryPhase.executeInternal(context.withCleanQueryResult(), queryPhaseSearcher);
1137+
assertFalse(Float.isNaN(context.queryResult().getMaxScore()));
1138+
assertEquals(2, context.queryResult().topDocs().topDocs.scoreDocs.length);
1139+
assertThat(context.queryResult().topDocs().topDocs.totalHits.value(), equalTo((long) numDocs));
1140+
assertThat(context.queryResult().topDocs().topDocs, instanceOf(CollapseTopFieldDocs.class));
1141+
1142+
topDocs = (CollapseTopFieldDocs) context.queryResult().topDocs().topDocs;
1143+
assertThat(topDocs.collapseValues.length, equalTo(2));
1144+
assertThat(topDocs.collapseValues[0], equalTo(0L)); // user == 0
1145+
assertThat(topDocs.collapseValues[1], equalTo(1L)); // user == 1
1146+
assertThat(context.queryResult().topDocs().topDocs.scoreDocs[0].score, equalTo(context.queryResult().getMaxScore()));
1147+
1148+
reader.close();
1149+
dir.close();
1150+
}
1151+
10911152
public void testCollapseQuerySearchResults() throws Exception {
10921153
Directory dir = newDirectory();
10931154
final Sort sort = new Sort(new SortField("user", SortField.Type.INT));

0 commit comments

Comments
 (0)