Skip to content

Commit 4cd76c7

Browse files
Use ScriptScoreQuery and KnnScoreDocQuery in ScriptRescorer
1 parent e551c8a commit 4cd76c7

File tree

13 files changed

+487
-349
lines changed

13 files changed

+487
-349
lines changed
Lines changed: 277 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,277 @@
1+
setup:
2+
- requires:
3+
cluster_features: [ "search.rescore.script" ]
4+
reason: "Test requires feature 'search.rescore.script' to test script rescore"
5+
test_runner_features: "close_to"
6+
- skip:
7+
reason: "contains is a newly added assertion"
8+
features: contains
9+
10+
- do:
11+
indices.create:
12+
index: books
13+
body:
14+
settings:
15+
number_of_shards: 1
16+
mappings:
17+
properties:
18+
title:
19+
type: text
20+
author:
21+
type: keyword
22+
num_likes:
23+
type: integer
24+
25+
- do:
26+
bulk:
27+
index: books
28+
refresh: true
29+
body:
30+
- '{"index": {"_id": "1"}}'
31+
- '{"title": "The Ethics of Ambiguity", "author": "Simone de Beauvoir", "num_likes": 150}'
32+
- '{"index": {"_id": "2"}}'
33+
- '{"title": "The Ethics of Being and Nothingness", "author": "Jean-Paul Sartre", "num_likes": 250}'
34+
- '{"index": {"_id": "3"}}'
35+
- '{"title": "The Ambiguity of The Second Sex", "author": "Simone de Beauvoir", "num_likes": 300}'
36+
- '{"index": {"_id": "4"}}'
37+
- '{"title": "The Ethics of Nausea", "author": "Jean-Paul Sartre", "num_likes": 180}'
38+
- '{"index": {"_id": "5"}}'
39+
- '{"title": "The Ambiguity in the Myth of Sisyphus", "author": "Albert Camus", "num_likes": 220}'
40+
41+
---
42+
"Basic script rescore":
43+
44+
# simple rescore script for all top docs
45+
- do:
46+
search:
47+
index: books
48+
body:
49+
query:
50+
match:
51+
title: "ethics of ambiguity"
52+
rescore:
53+
window_size: 5
54+
script:
55+
script:
56+
source: "doc['num_likes'].value * params.multiplier"
57+
params:
58+
multiplier: 10
59+
- match: { hits.total.value: 5 }
60+
- match: { hits.hits.0._id: "3" }
61+
- close_to: { hits.hits.0._score: {value: 3000, error: 0.0001} }
62+
- match: { hits.hits.1._id: "2" }
63+
- close_to: { hits.hits.1._score: {value: 2500, error: 0.0001} }
64+
- match: { hits.hits.2._id: "5" }
65+
- close_to: { hits.hits.2._score: {value: 2200, error: 0.0001} }
66+
- match: { hits.hits.3._id: "4" }
67+
- close_to: { hits.hits.3._score: {value: 1800, error: 0.0001} }
68+
- match: { hits.hits.4._id: "1" }
69+
- close_to: { hits.hits.4._score: {value: 1500, error: 0.0001} }
70+
71+
# rescore script with _score for all top docs
72+
- do:
73+
search:
74+
index: books
75+
body:
76+
query:
77+
match:
78+
title: "ethics of ambiguity"
79+
rescore:
80+
window_size: 5
81+
script:
82+
script:
83+
source: "doc['num_likes'].value * params.multiplier + _score"
84+
params:
85+
multiplier: 10
86+
- match: { hits.hits.0._id: "3" }
87+
- close_to: { hits.hits.0._score: { value: 3000.5989, error: 0.0001 } }
88+
- match: { hits.hits.1._id: "2" }
89+
- close_to: { hits.hits.1._score: { value: 2500.5989, error: 0.0001 } }
90+
- match: { hits.hits.2._id: "5" }
91+
- close_to: { hits.hits.2._score: { value: 2200.5583, error: 0.0001 } }
92+
- match: { hits.hits.3._id: "4" }
93+
- close_to: { hits.hits.3._score: { value: 1800.7003, error: 0.0001 } }
94+
- match: { hits.hits.4._id: "1" }
95+
- close_to: { hits.hits.4._score: { value: 1501.3032, error: 0.0001 } }
96+
97+
# simple rescore script for small window size
98+
- do:
99+
search:
100+
index: books
101+
body:
102+
query:
103+
match:
104+
title: "ethics of ambiguity"
105+
rescore:
106+
window_size: 3
107+
script:
108+
script:
109+
source: "doc['num_likes'].value * params.multiplier"
110+
params:
111+
multiplier: 10
112+
- match: { hits.hits.0._id: "2" }
113+
- close_to: { hits.hits.0._score: { value: 2500, error: 0.0001 } }
114+
- match: { hits.hits.1._id: "4" }
115+
- close_to: { hits.hits.1._score: { value: 1800, error: 0.0001 } }
116+
- match: { hits.hits.2._id: "1" }
117+
- close_to: { hits.hits.2._score: { value: 1500, error: 0.0001 } }
118+
- match: { hits.hits.3._id: "3" }
119+
- close_to: { hits.hits.3._score: { value: 0.59879, error: 0.0001 } }
120+
- match: { hits.hits.4._id: "5" }
121+
- close_to: { hits.hits.4._score: { value: 0.5583, error: 0.0001 } }
122+
123+
# rescore script with _score for small window size
124+
- do:
125+
search:
126+
index: books
127+
body:
128+
query:
129+
match:
130+
title: "ethics of ambiguity"
131+
rescore:
132+
window_size: 3
133+
script:
134+
script:
135+
source: "doc['num_likes'].value * params.multiplier + _score"
136+
params:
137+
multiplier: 10
138+
- match: { hits.hits.0._id: "2" }
139+
- close_to: { hits.hits.0._score: { value: 2500.5989, error: 0.0001 } }
140+
- match: { hits.hits.1._id: "4" }
141+
- close_to: { hits.hits.1._score: { value: 1800.7003, error: 0.0001 } }
142+
- match: { hits.hits.2._id: "1" }
143+
- close_to: { hits.hits.2._score: { value: 1501.3032, error: 0.0001 } }
144+
- match: { hits.hits.3._id: "3" }
145+
- close_to: { hits.hits.3._score: { value: 0.59879, error: 0.0001 } }
146+
- match: { hits.hits.4._id: "5" }
147+
- close_to: { hits.hits.4._score: { value: 0.5583, error: 0.0001 } }
148+
149+
---
150+
"Multiple script rescore":
151+
- do:
152+
search:
153+
index: books
154+
body:
155+
query:
156+
match:
157+
title: "ethics of ambiguity"
158+
rescore:
159+
- window_size: 3
160+
script:
161+
script:
162+
source: "doc['num_likes'].value * params.multiplier + _score"
163+
params:
164+
multiplier: 10
165+
- window_size: 2
166+
script:
167+
script:
168+
source: "_score * params.factor"
169+
params:
170+
factor: 10
171+
- match: { hits.hits.0._id: "2" }
172+
- close_to: { hits.hits.0._score: { value: 25005.989, error: 0.001 } } # rescored by 1st and 2nd scripts
173+
- match: { hits.hits.1._id: "4" }
174+
- close_to: { hits.hits.1._score: { value: 18007.0039, error: 0.001 } } # rescored by 1st and 2nd scripts
175+
- match: { hits.hits.2._id: "1" }
176+
- close_to: { hits.hits.2._score: { value: 1501.3032, error: 0.0001 } } # rescored by 1st script
177+
- match: { hits.hits.3._id: "3" }
178+
- close_to: { hits.hits.3._score: { value: 0.59879, error: 0.0001 } } # not rescored
179+
- match: { hits.hits.4._id: "5" }
180+
- close_to: { hits.hits.4._score: { value: 0.5583, error: 0.0001 } } # not rescored
181+
182+
---
183+
"Rescore Script With Explanation":
184+
- do:
185+
search:
186+
index: books
187+
explain: true
188+
body:
189+
query:
190+
match:
191+
title: "ethics of ambiguity"
192+
rescore:
193+
window_size: 3
194+
script:
195+
script:
196+
source: "doc['num_likes'].value * params.multiplier + _score"
197+
params:
198+
multiplier: 10
199+
- match: { hits.total.value: 5 }
200+
201+
# hit went through rescore
202+
- contains: { hits.hits.0._explanation.description: "script score function" }
203+
- close_to: { hits.hits.0._explanation.value: { value: 2500.5989, error: 0.0001 } }
204+
- match: { hits.hits.0._explanation.details.0.description: '_score: ' }
205+
- close_to: { hits.hits.0._explanation.details.0.value: { value: 0.5987902, error: 0.0001 } }
206+
207+
# hit did not go through rescore
208+
- match: { hits.hits.4._explanation.description: "sum of:" }
209+
- close_to: { hits.hits.4._explanation.value: { value: 0.5583, error: 0.0001 } }
210+
211+
212+
---
213+
"Script rescore on Multiple Segments":
214+
# update some documents to create multiple segments
215+
- do:
216+
bulk:
217+
index: books
218+
refresh: true
219+
body:
220+
- '{"update": {"_id": "1"}}'
221+
- '{"doc": {"num_likes": 300}}'
222+
- '{"update": {"_id": "3"}}'
223+
- '{"doc": {"num_likes": 150}}'
224+
225+
- do:
226+
search:
227+
index: books
228+
body:
229+
query:
230+
match:
231+
title: "ethics of ambiguity"
232+
rescore:
233+
window_size: 5
234+
script:
235+
script:
236+
source: "doc['num_likes'].value * params.multiplier + _score"
237+
params:
238+
multiplier: 10
239+
- match: { hits.total.value: 5 }
240+
- match: { hits.hits.0._id: "1" }
241+
- close_to: { hits.hits.0._score: { value: 3001.1267, error: 0.0001 } }
242+
- match: { hits.hits.1._id: "2" }
243+
- close_to: { hits.hits.1._score: { value: 2500.6064, error: 0.0001 } }
244+
- match: { hits.hits.2._id: "5" }
245+
- close_to: { hits.hits.2._score: { value: 2200.3877, error: 0.0001 } }
246+
- match: { hits.hits.3._id: "4" }
247+
- close_to: { hits.hits.3._score: { value: 1800.7106, error: 0.0001 } }
248+
- match: { hits.hits.4._id: "3" }
249+
- close_to: { hits.hits.4._score: { value: 1500.4163, error: 0.0001 } }
250+
251+
# rescore script with _score for small window size
252+
- do:
253+
search:
254+
index: books
255+
body:
256+
query:
257+
match:
258+
title: "ethics of ambiguity"
259+
rescore:
260+
window_size: 3
261+
script:
262+
script:
263+
source: "doc['num_likes'].value * params.multiplier + _score"
264+
params:
265+
multiplier: 10
266+
- match: { hits.total.value: 5 }
267+
- match: { hits.hits.0._id: "1" }
268+
- close_to: { hits.hits.0._score: { value: 3001.1267, error: 0.0001 } }
269+
- match: { hits.hits.1._id: "2" }
270+
- close_to: { hits.hits.1._score: { value: 2500.6064, error: 0.0001 } }
271+
- match: { hits.hits.2._id: "4" }
272+
- close_to: { hits.hits.2._score: { value: 1800.7106, error: 0.0001 } }
273+
- match: { hits.hits.3._id: "3" }
274+
- close_to: { hits.hits.3._score: { value: 0.41622, error: 0.0001 } }
275+
- match: { hits.hits.4._id: "5" }
276+
- close_to: { hits.hits.4._score: { value: 0.38778, error: 0.0001 } }
277+

server/src/main/java/org/elasticsearch/TransportVersions.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,7 @@ static TransportVersion def(int id) {
365365
public static final TransportVersion INDEX_TEMPLATE_TRACKING_INFO = def(9_136_0_00);
366366
public static final TransportVersion EXTENDED_SNAPSHOT_STATS_IN_NODE_INFO = def(9_137_0_00);
367367
public static final TransportVersion SIMULATE_INGEST_MAPPING_MERGE_TYPE = def(9_138_0_00);
368+
public static final TransportVersion SCRIPT_RESCORER = def(9_139_0_00);
368369

369370
/*
370371
* STOP! READ THIS FIRST! No, really,

server/src/main/java/org/elasticsearch/common/lucene/search/function/ScriptScoreQuery.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -386,4 +386,12 @@ public long cost() {
386386

387387
}
388388

389+
public boolean needsScore() {
390+
return scriptBuilder.needs_score();
391+
}
392+
393+
public ScriptScoreQuery cloneWithNewSubQuery(Query newSubQuery) {
394+
return new ScriptScoreQuery(newSubQuery, script, scriptBuilder, lookup, minScore, indexName, shardId, indexVersion);
395+
}
396+
389397
}

server/src/main/java/org/elasticsearch/search/SearchFeatures.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ public Set<NodeFeature> getFeatures() {
3333
static final NodeFeature MULTI_MATCH_CHECKS_POSITIONS = new NodeFeature("search.multi.match.checks.positions");
3434
public static final NodeFeature BBQ_HNSW_DEFAULT_INDEXING = new NodeFeature("search.vectors.mappers.default_bbq_hnsw");
3535
public static final NodeFeature SEARCH_WITH_NO_DIMENSIONS_BUGFIX = new NodeFeature("search.vectors.no_dimensions_bugfix");
36+
public static final NodeFeature SEARCH_RESCORE_SCRIPT = new NodeFeature("search.rescore.script");
3637

3738
@Override
3839
public Set<NodeFeature> getTestFeatures() {
@@ -43,7 +44,8 @@ public Set<NodeFeature> getTestFeatures() {
4344
INT_SORT_FOR_INT_SHORT_BYTE_FIELDS,
4445
MULTI_MATCH_CHECKS_POSITIONS,
4546
BBQ_HNSW_DEFAULT_INDEXING,
46-
SEARCH_WITH_NO_DIMENSIONS_BUGFIX
47+
SEARCH_WITH_NO_DIMENSIONS_BUGFIX,
48+
SEARCH_RESCORE_SCRIPT
4749
);
4850
}
4951
}

server/src/main/java/org/elasticsearch/search/SearchModule.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,7 @@
232232
import org.elasticsearch.search.rank.feature.RankFeatureShardResult;
233233
import org.elasticsearch.search.rescore.QueryRescorerBuilder;
234234
import org.elasticsearch.search.rescore.RescorerBuilder;
235+
import org.elasticsearch.search.rescore.ScriptRescorerBuilder;
235236
import org.elasticsearch.search.retriever.KnnRetrieverBuilder;
236237
import org.elasticsearch.search.retriever.RescorerRetrieverBuilder;
237238
import org.elasticsearch.search.retriever.RetrieverBuilder;
@@ -825,6 +826,7 @@ private void registerPipelineAggregation(PipelineAggregationSpec spec) {
825826

826827
private void registerRescorers(List<SearchPlugin> plugins) {
827828
registerRescorer(new RescorerSpec<>(QueryRescorerBuilder.NAME, QueryRescorerBuilder::new, QueryRescorerBuilder::fromXContent));
829+
registerRescorer(new RescorerSpec<>(ScriptRescorerBuilder.NAME, ScriptRescorerBuilder::new, ScriptRescorerBuilder::fromXContent));
828830
registerFromPlugin(plugins, SearchPlugin::getRescorers, this::registerRescorer);
829831
}
830832

server/src/main/java/org/elasticsearch/search/rescore/QueryRescorer.java

Lines changed: 1 addition & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,12 @@
1111

1212
import org.apache.lucene.search.Explanation;
1313
import org.apache.lucene.search.IndexSearcher;
14-
import org.apache.lucene.search.ScoreDoc;
1514
import org.apache.lucene.search.TopDocs;
1615
import org.elasticsearch.index.query.ParsedQuery;
1716

1817
import java.io.IOException;
1918
import java.util.Arrays;
2019
import java.util.Collections;
21-
import java.util.Comparator;
2220
import java.util.List;
2321
import java.util.Set;
2422

@@ -61,7 +59,7 @@ protected float combine(float firstPassScore, boolean secondPassMatches, float s
6159
};
6260

6361
// First take top slice of incoming docs, to be rescored:
64-
TopDocs topNFirstPass = topN(topDocs, rescoreContext.getWindowSize());
62+
TopDocs topNFirstPass = Rescorer.topN(topDocs, rescoreContext.getWindowSize());
6563

6664
// Save doc IDs for which rescoring was applied to be used in score explanation
6765
Set<Integer> topNDocIDs = Arrays.stream(topNFirstPass.scoreDocs).map(scoreDoc -> scoreDoc.doc).collect(toUnmodifiableSet());
@@ -118,24 +116,6 @@ public Explanation explain(int topLevelDocId, IndexSearcher searcher, RescoreCon
118116
return prim;
119117
}
120118

121-
private static final Comparator<ScoreDoc> SCORE_DOC_COMPARATOR = (o1, o2) -> {
122-
int cmp = Float.compare(o2.score, o1.score);
123-
return cmp == 0 ? Integer.compare(o1.doc, o2.doc) : cmp;
124-
};
125-
126-
/** Returns a new {@link TopDocs} with the topN from the incoming one, or the same TopDocs if the number of hits is already &lt;=
127-
* topN. */
128-
private static TopDocs topN(TopDocs in, int topN) {
129-
if (in.scoreDocs.length < topN) {
130-
return in;
131-
}
132-
133-
ScoreDoc[] subset = new ScoreDoc[topN];
134-
System.arraycopy(in.scoreDocs, 0, subset, 0, topN);
135-
136-
return new TopDocs(in.totalHits, subset);
137-
}
138-
139119
/** Modifies incoming TopDocs (in) by replacing the top hits with resorted's hits, and then resorting all hits. */
140120
private static TopDocs combine(TopDocs in, TopDocs resorted, QueryRescoreContext ctx) {
141121

0 commit comments

Comments
 (0)