Skip to content

Commit d2ab99d

Browse files
author
afoucret
committed
Implement rerank using muli-value fields.
1 parent 5d707e6 commit d2ab99d

File tree

12 files changed

+548
-505
lines changed

12 files changed

+548
-505
lines changed

x-pack/plugin/esql/qa/testFixtures/src/main/resources/rerank.csv-spec

Lines changed: 14 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -64,26 +64,6 @@ book_no:keyword | title:text | author
6464
5327 | War and Peace | Leo Tolstoy | 0.08
6565
;
6666

67-
reranker using a non text fields
68-
required_capability: rerank
69-
required_capability: match_operator_colon
70-
71-
FROM books METADATA _score
72-
| WHERE title:"war and peace" AND author:"Tolstoy"
73-
| RERANK "war and peace" ON ratings WITH { "inference_id" : "test_reranker" }
74-
| EVAL _score=ROUND(_score, 2), ratings = ROUND(ratings, 2)
75-
| SORT _score DESC, book_no ASC
76-
| KEEP book_no, title, ratings, _score
77-
;
78-
79-
book_no:keyword | title:text | ratings:double | _score:double
80-
2776 | The Devil and Other Stories (Oxford World's Classics) | 5.0 | 0.33
81-
4536 | War and Peace (Signet Classics) | 4.75 | 0.25
82-
5327 | War and Peace | 3.84 | 0.06
83-
9032 | War and Peace: A Novel (6 Volumes) | 3.81 | 0.06
84-
;
85-
86-
8767
reranker using a sparse input field
8868
required_capability: rerank
8969
required_capability: match_operator_colon
@@ -118,33 +98,12 @@ FROM books METADATA _score
11898
;
11999

120100
book_no:keyword | title:text | author:text | _score:double
121-
5327 | War and Peace | Leo Tolstoy | 0.02
122-
2776 | The Devil and Other Stories (Oxford World's Classics) | Leo Tolstoy | 0.01
123-
4536 | War and Peace (Signet Classics) | [John Hockenberry, Leo Tolstoy, Pat Conroy] | 0.01
124-
9032 | War and Peace: A Novel (6 Volumes) | Tolstoy Leo | 0.01
125-
;
126-
127-
128-
reranker using multiple fields with some non text fields
129-
required_capability: rerank
130-
required_capability: match_operator_colon
131-
132-
FROM books METADATA _score
133-
| WHERE title:"war and peace" AND author:"Tolstoy"
134-
| RERANK "war and peace" ON title, ratings WITH { "inference_id" : "test_reranker" }
135-
| EVAL _score=ROUND(_score, 2), ratings = ROUND(ratings, 2)
136-
| SORT _score DESC, book_no ASC
137-
| KEEP book_no, title, ratings, _score
138-
;
139-
140-
book_no:keyword | title:text | ratings:double | _score:double
141-
4536 | War and Peace (Signet Classics) | 4.75 | 0.02
142-
5327 | War and Peace | 3.84 | 0.02
143-
2776 | The Devil and Other Stories (Oxford World's Classics) | 5.0 | 0.01
144-
9032 | War and Peace: A Novel (6 Volumes) | 3.81 | 0.01
101+
4536 | War and Peace (Signet Classics) | [John Hockenberry, Leo Tolstoy, Pat Conroy] | 0.1
102+
2776 | The Devil and Other Stories (Oxford World's Classics) | Leo Tolstoy | 0.09
103+
5327 | War and Peace | Leo Tolstoy | 0.09
104+
9032 | War and Peace: A Novel (6 Volumes) | Tolstoy Leo | 0.09
145105
;
146106

147-
148107
reranker after a limit
149108
required_capability: rerank
150109
required_capability: match_operator_colon
@@ -263,17 +222,17 @@ FROM books METADATA _score
263222
| SORT _score DESC
264223
| LIMIT 100
265224
| RERANK rerank_score = "hobbit" ON description, author WITH { "inference_id" : "test_reranker" }
266-
| SORT rerank_score
225+
| SORT rerank_score DESC, book_no DESC
267226
| LIMIT 3
268227
| KEEP title, _score, rerank_score
269228
// end::two-queries[]
270229
;
271230

272231
// tag::two-queries-result[]
273-
title:text | _score:double | rerank_score:double
274-
Return of the Shadow | 3.4218082427978516 | 5.740527994930744E-4
275-
Return of the King Being the Third Part of The Lord of the Rings | 2.8398752212524414 | 9.000900317914784E-4
276-
The Lays of Beleriand | 1.5629040002822876 | 9.36329597607255E-4
232+
title:text | _score:double | rerank_score:double
233+
The Lord of the Rings Poster Collection: Six Paintings by Alan Lee (No. 1) | 0.7976278 | 0.125
234+
A Middle English Reader and Vocabulary | 1.043297 | 0.07692308
235+
FINAL WITNESS | 1.405817 | 0.07692308
277236
// end::two-queries-result[]
278237
;
279238

@@ -286,7 +245,7 @@ FROM books METADATA _score
286245
| WHERE MATCH(description, "hobbit") OR MATCH(author, "Tolkien")
287246
| SORT _score DESC
288247
| LIMIT 100
289-
| RERANK rerank_score = "hobbit" ON description, author WITH { "inference_id" : "test_reranker" }
248+
| RERANK rerank_score = "hobbit" ON title, description WITH { "inference_id" : "test_reranker" }
290249
| EVAL original_score = _score, _score = rerank_score + original_score
291250
| SORT _score
292251
| LIMIT 3
@@ -295,10 +254,10 @@ FROM books METADATA _score
295254
;
296255

297256
// tag::combine-result[]
298-
title:text | _score:double | rerank_score:double | rerank_score:double
299-
Poems from the Hobbit | 4.012462615966797 | 0.001396648003719747 | 0.001396648003719747
300-
The Lord of the Rings - Boxed Set | 3.768855094909668 | 0.0010020040208473802 | 0.001396648003719747
301-
Return of the King Being the Third Part of The Lord of the Rings | 3.6248698234558105 | 9.000900317914784E-4 | 0.001396648003719747
257+
title:text | _score:double | rerank_score:double
258+
The Lord of the Rings Poster Collection: Six Paintings by Alan Lee (No. 1) | 0.7976278 | 0.125
259+
A Middle English Reader and Vocabulary | 1.043297 | 0.07692308
260+
FINAL WITNESS | 1.405817 | 0.07692308
302261
// end::combine-result[]
303262
;
304263

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/inference/XContentRowEncoder.java

Lines changed: 0 additions & 167 deletions
This file was deleted.

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/inference/rerank/RerankOperator.java

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
import org.elasticsearch.xpack.esql.inference.InferenceOperator;
1414
import org.elasticsearch.xpack.esql.inference.InferenceService;
1515

16+
import java.util.List;
17+
1618
/**
1719
* {@link RerankOperator} is an {@link InferenceOperator} that computes relevance scores for rows using a reranking model.
1820
* It evaluates a row encoder expression for each input row, batches them together, and sends them to the reranking service
@@ -29,27 +31,27 @@ public class RerankOperator extends InferenceOperator {
2931
/**
3032
* Constructs a new {@code RerankOperator}.
3133
*
32-
* @param driverContext The driver context.
33-
* @param inferenceService The inference service to use for executing inference requests.
34-
* @param inferenceId The ID of the reranking model to invoke.
35-
* @param queryText The query text to use for reranking.
36-
* @param inputEvaluator Evaluator for computing reranked text from input rows.
37-
* @param scoreChannel The output channel where the relevance scores will be written.
38-
* @param batchSize The number of rows to include in each inference request batch.
34+
* @param driverContext The driver context.
35+
* @param inferenceService The inference service to use for executing inference requests.
36+
* @param inferenceId The ID of the reranking model to invoke.
37+
* @param queryText The query text to use for reranking.
38+
* @param inputEvaluators Evaluator for computing reranked texts from input rows.
39+
* @param scoreChannel The output channel where the relevance scores will be written.
40+
* @param batchSize \The number of rows to include in each inference request batch.
3941
*/
4042
RerankOperator(
4143
DriverContext driverContext,
4244
InferenceService inferenceService,
4345
String inferenceId,
4446
String queryText,
45-
ExpressionEvaluator inputEvaluator,
47+
ExpressionEvaluator[] inputEvaluators,
4648
int scoreChannel,
4749
int batchSize
4850
) {
4951
super(
5052
driverContext,
5153
inferenceService,
52-
new RerankRequestIterator.Factory(inferenceId, queryText, inputEvaluator, batchSize),
54+
new RerankRequestIterator.Factory(inferenceId, queryText, inputEvaluators, batchSize),
5355
new RerankOutputBuilder(driverContext.blockFactory(), scoreChannel)
5456
);
5557
this.queryText = queryText;
@@ -67,10 +69,11 @@ public record Factory(
6769
InferenceService inferenceService,
6870
String inferenceId,
6971
String queryText,
70-
ExpressionEvaluator.Factory rowEncoderFactory,
72+
List<ExpressionEvaluator.Factory> inputEvaluatorFactories,
7173
int scoreChannel,
7274
int batchSize
7375
) implements OperatorFactory {
76+
7477
@Override
7578
public String describe() {
7679
return "RerankOperator[inference_id=[" + inferenceId + "], query=[" + queryText + "], score_channel=[" + scoreChannel + "]]";
@@ -83,11 +86,16 @@ public Operator get(DriverContext driverContext) {
8386
inferenceService,
8487
inferenceId,
8588
queryText,
86-
rowEncoderFactory().get(driverContext),
89+
inputEvaluators(driverContext),
8790
scoreChannel,
8891
batchSize
8992
);
9093
}
91-
}
9294

95+
protected ExpressionEvaluator[] inputEvaluators(DriverContext driverContext) {
96+
return inputEvaluatorFactories.stream()
97+
.map(evaluatorFactory -> evaluatorFactory.get(driverContext))
98+
.toArray(ExpressionEvaluator[]::new);
99+
}
100+
}
93101
}

0 commit comments

Comments
 (0)