Skip to content

Commit d388ff8

Browse files
committed
Update RERANK:
- Not sorting the data anymore - Output the score in the specified column - Applied additional logical plan optimizations since the plan is now a generating plan (because we removed the SORT clause) - Updated tests
1 parent 1a6e5b2 commit d388ff8

File tree

16 files changed

+398
-148
lines changed

16 files changed

+398
-148
lines changed

x-pack/plugin/esql/qa/testFixtures/src/main/resources/rerank.csv-spec

Lines changed: 88 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -3,21 +3,62 @@
33
// This makes the output more predictable which is helpful here.
44

55

6-
reranker using a single field
6+
reranker using a single field, overwrite existing _score column
77
required_capability: rerank
88
required_capability: match_operator_colon
99

1010
FROM books METADATA _score
1111
| WHERE title:"war and peace" AND author:"Tolstoy"
12+
| SORT _score DESC, book_no ASC
1213
| RERANK "war and peace" ON title WITH inferenceId=test_reranker
13-
| KEEP book_no, title, author
14+
| EVAL _score=ROUND(_score, 2)
15+
| KEEP book_no, title, author, _score
1416
;
1517

16-
book_no:keyword | title:text | author:text
17-
5327 | War and Peace | Leo Tolstoy
18-
4536 | War and Peace (Signet Classics) | [John Hockenberry, Leo Tolstoy, Pat Conroy]
19-
9032 | War and Peace: A Novel (6 Volumes) | Tolstoy Leo
20-
2776 | The Devil and Other Stories (Oxford World's Classics) | Leo Tolstoy
18+
book_no:keyword | title:text | author:text | _score:double
19+
5327 | War and Peace | Leo Tolstoy | 0.08
20+
4536 | War and Peace (Signet Classics) | [John Hockenberry, Leo Tolstoy, Pat Conroy] | 0.03
21+
9032 | War and Peace: A Novel (6 Volumes) | Tolstoy Leo | 0.03
22+
2776 | The Devil and Other Stories (Oxford World's Classics) | Leo Tolstoy | 0.02
23+
;
24+
25+
reranker using a single field, create a mew column
26+
required_capability: rerank
27+
required_capability: match_operator_colon
28+
29+
FROM books METADATA _score
30+
| WHERE title:"war and peace" AND author:"Tolstoy"
31+
| SORT _score DESC, book_no ASC
32+
| RERANK "war and peace" ON title WITH inferenceId=test_reranker, scoreColumn=rerank_score
33+
| EVAL _score=ROUND(_score, 2), rerank_score=ROUND(rerank_score, 2)
34+
| KEEP book_no, title, author, rerank_score
35+
;
36+
37+
book_no:keyword | title:text | author:text | rerank_score:double
38+
5327 | War and Peace | Leo Tolstoy | 0.08
39+
4536 | War and Peace (Signet Classics) | [John Hockenberry, Leo Tolstoy, Pat Conroy] | 0.03
40+
9032 | War and Peace: A Novel (6 Volumes) | Tolstoy Leo | 0.03
41+
2776 | The Devil and Other Stories (Oxford World's Classics) | Leo Tolstoy | 0.02
42+
;
43+
44+
reranker using a single field, create a mew column, sort by rerank_score
45+
required_capability: rerank
46+
required_capability: match_operator_colon
47+
48+
FROM books METADATA _score
49+
| WHERE title:"war and peace" AND author:"Tolstoy"
50+
| SORT _score DESC
51+
| RERANK "war and peace" ON title WITH inferenceId=test_reranker, scoreColumn=rerank_score
52+
| EVAL _score=ROUND(_score, 2), rerank_score=ROUND(rerank_score, 2)
53+
| SORT rerank_score, _score ASC, book_no ASC
54+
| KEEP book_no, title, author, rerank_score
55+
;
56+
57+
book_no:keyword | title:text | author:text | rerank_score:double
58+
2776 | The Devil and Other Stories (Oxford World's Classics) | Leo Tolstoy | 0.02
59+
9032 | War and Peace: A Novel (6 Volumes) | Tolstoy Leo | 0.03
60+
4536 | War and Peace (Signet Classics) | [John Hockenberry, Leo Tolstoy, Pat Conroy] | 0.03
61+
5327 | War and Peace | Leo Tolstoy | 0.08
2162
;
2263

2364

@@ -27,15 +68,17 @@ required_capability: match_operator_colon
2768

2869
FROM books METADATA _score
2970
| WHERE title:"war and peace" AND author:"Tolstoy"
30-
| RERANK "war and peace" ON title, author inferenceId=test_reranker
31-
| KEEP book_no, title, author
71+
| RERANK "war and peace" ON title, author WITH inferenceId=test_reranker
72+
| EVAL _score=ROUND(_score, 2)
73+
| SORT _score DESC, book_no ASC
74+
| KEEP book_no, title, author, _score
3275
;
3376

34-
book_no:keyword | title:text | author:text
35-
5327 | War and Peace | Leo Tolstoy
36-
9032 | War and Peace: A Novel (6 Volumes) | Tolstoy Leo
37-
2776 | The Devil and Other Stories (Oxford World's Classics) | Leo Tolstoy
38-
4536 | War and Peace (Signet Classics) | [John Hockenberry, Leo Tolstoy, Pat Conroy]
77+
book_no:keyword | title:text | author:text | _score:double
78+
5327 | War and Peace | Leo Tolstoy | 0.02
79+
2776 | The Devil and Other Stories (Oxford World's Classics) | Leo Tolstoy | 0.01
80+
4536 | War and Peace (Signet Classics) | [John Hockenberry, Leo Tolstoy, Pat Conroy] | 0.01
81+
9032 | War and Peace: A Novel (6 Volumes) | Tolstoy Leo | 0.01
3982
;
4083

4184

@@ -45,16 +88,18 @@ required_capability: match_operator_colon
4588

4689
FROM books METADATA _score
4790
| WHERE title:"war and peace" AND author:"Tolstoy"
48-
| SORT _score DESC
91+
| SORT _score DESC, book_no ASC
4992
| LIMIT 3
50-
| RERANK "war and peace" ON title inferenceId=test_reranker
51-
| KEEP book_no, title, author
93+
| RERANK "war and peace" ON title WITH inferenceId=test_reranker
94+
| EVAL _score=ROUND(_score, 2)
95+
| SORT _score DESC, book_no ASC
96+
| KEEP book_no, title, author, _score
5297
;
5398

54-
book_no:keyword | title:text | author:text
55-
5327 | War and Peace | Leo Tolstoy
56-
4536 | War and Peace (Signet Classics) | [John Hockenberry, Leo Tolstoy, Pat Conroy]
57-
9032 | War and Peace: A Novel (6 Volumes) | Tolstoy Leo
99+
book_no:keyword | title:text | author:text | _score:double
100+
5327 | War and Peace | Leo Tolstoy | 0.08
101+
4536 | War and Peace (Signet Classics) | [John Hockenberry, Leo Tolstoy, Pat Conroy] | 0.03
102+
9032 | War and Peace: A Novel (6 Volumes) | Tolstoy Leo | 0.03
58103
;
59104

60105

@@ -64,15 +109,17 @@ required_capability: match_operator_colon
64109

65110
FROM books METADATA _score
66111
| WHERE title:"war and peace" AND author:"Tolstoy"
67-
| RERANK "war and peace" ON title inferenceId=test_reranker
68-
| KEEP book_no, title, author
112+
| RERANK "war and peace" ON title WITH inferenceId=test_reranker
113+
| EVAL _score=ROUND(_score, 2)
114+
| SORT _score DESC, book_no ASC
115+
| KEEP book_no, title, author, _score
69116
| LIMIT 3
70117
;
71118

72-
book_no:keyword | title:text | author:text
73-
5327 | War and Peace | Leo Tolstoy
74-
4536 | War and Peace (Signet Classics) | [John Hockenberry, Leo Tolstoy, Pat Conroy]
75-
9032 | War and Peace: A Novel (6 Volumes) | Tolstoy Leo
119+
book_no:keyword | title:text | author:text | _score:double
120+
5327 | War and Peace | Leo Tolstoy | 0.08
121+
4536 | War and Peace (Signet Classics) | [John Hockenberry, Leo Tolstoy, Pat Conroy] | 0.03
122+
9032 | War and Peace: A Novel (6 Volumes) | Tolstoy Leo | 0.03
76123
;
77124

78125

@@ -82,16 +129,17 @@ required_capability: match_operator_colon
82129

83130
FROM books
84131
| WHERE title:"war and peace" AND author:"Tolstoy"
85-
| RERANK "war and peace" ON title inferenceId=test_reranker
86-
| KEEP book_no, title, author
132+
| RERANK "war and peace" ON title WITH inferenceId=test_reranker
133+
| EVAL _score=ROUND(_score, 2)
134+
| KEEP book_no, title, author, _score
87135
| SORT author, title
88136
| LIMIT 3
89137
;
90138

91-
book_no:keyword | title:text | author:text
92-
4536 | War and Peace (Signet Classics) | [John Hockenberry, Leo Tolstoy, Pat Conroy]
93-
2776 | The Devil and Other Stories (Oxford World's Classics) | Leo Tolstoy
94-
5327 | War and Peace | Leo Tolstoy
139+
book_no:keyword | title:text | author:text | _score:double
140+
4536 | War and Peace (Signet Classics) | [John Hockenberry, Leo Tolstoy, Pat Conroy] | 0.03
141+
2776 | The Devil and Other Stories (Oxford World's Classics) | Leo Tolstoy | 0.02
142+
5327 | War and Peace | Leo Tolstoy | 0.08
95143
;
96144

97145

@@ -105,12 +153,14 @@ FROM books METADATA _id, _index, _score
105153
| FORK ( WHERE title:"Tolkien" | SORT _score, _id DESC | LIMIT 3 )
106154
( WHERE author:"Tolkien" | SORT _score, _id DESC | LIMIT 3 )
107155
| RRF
108-
| RERANK "Tolkien" ON title inferenceId=test_reranker
156+
| RERANK "Tolkien" ON title WITH inferenceId=test_reranker
157+
| EVAL _score=ROUND(_score, 2)
158+
| SORT _score DESC, book_no ASC
109159
| LIMIT 2
110-
| KEEP book_no, title, author
160+
| KEEP book_no, title, author, _score
111161
;
112162

113-
book_no:keyword | title:keyword | author:keyword
114-
5335 | Letters of J R R Tolkien | J.R.R. Tolkien
115-
2130 | The J. R. R. Tolkien Audio Collection | [Christopher Tolkien, John Ronald Reuel Tolkien]
163+
book_no:keyword | title:keyword | author:keyword | _score:double
164+
5335 | Letters of J R R Tolkien | J.R.R. Tolkien | 0.04
165+
2130 | The J. R. R. Tolkien Audio Collection | [Christopher Tolkien, John Ronald Reuel Tolkien] | 0.03
116166
;

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,9 @@
3939
import org.elasticsearch.xpack.esql.optimizer.rules.logical.PushDownAndCombineLimits;
4040
import org.elasticsearch.xpack.esql.optimizer.rules.logical.PushDownAndCombineOrderBy;
4141
import org.elasticsearch.xpack.esql.optimizer.rules.logical.PushDownAndCombineSample;
42-
import org.elasticsearch.xpack.esql.optimizer.rules.logical.PushDownCompletion;
4342
import org.elasticsearch.xpack.esql.optimizer.rules.logical.PushDownEnrich;
4443
import org.elasticsearch.xpack.esql.optimizer.rules.logical.PushDownEval;
44+
import org.elasticsearch.xpack.esql.optimizer.rules.logical.PushDownInferencePlan;
4545
import org.elasticsearch.xpack.esql.optimizer.rules.logical.PushDownRegexExtract;
4646
import org.elasticsearch.xpack.esql.optimizer.rules.logical.RemoveStatsOverride;
4747
import org.elasticsearch.xpack.esql.optimizer.rules.logical.ReplaceAggregateAggExpressionWithEval;
@@ -194,7 +194,7 @@ protected static Batch<LogicalPlan> operators() {
194194
new PushDownAndCombineLimits(),
195195
new PushDownAndCombineFilters(),
196196
new PushDownAndCombineSample(),
197-
new PushDownCompletion(),
197+
new PushDownInferencePlan(),
198198
new PushDownEval(),
199199
new PushDownRegexExtract(),
200200
new PushDownEnrich(),

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PushDownAndCombineFilters.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
import org.elasticsearch.xpack.esql.plan.logical.Project;
2525
import org.elasticsearch.xpack.esql.plan.logical.RegexExtract;
2626
import org.elasticsearch.xpack.esql.plan.logical.UnaryPlan;
27-
import org.elasticsearch.xpack.esql.plan.logical.inference.Completion;
27+
import org.elasticsearch.xpack.esql.plan.logical.inference.InferencePlan;
2828
import org.elasticsearch.xpack.esql.plan.logical.join.InlineJoin;
2929
import org.elasticsearch.xpack.esql.plan.logical.join.Join;
3030
import org.elasticsearch.xpack.esql.plan.logical.join.JoinTypes;
@@ -72,10 +72,10 @@ protected LogicalPlan rule(Filter filter) {
7272
// Push down filters that do not rely on attributes created by RegexExtract
7373
var attributes = AttributeSet.of(Expressions.asAttributes(re.extractedFields()));
7474
plan = maybePushDownPastUnary(filter, re, attributes::contains, NO_OP);
75-
} else if (child instanceof Completion completion) {
75+
} else if (child instanceof InferencePlan<?> inferencePlan) {
7676
// Push down filters that do not rely on attributes created by Cpmpletion
77-
var attributes = AttributeSet.of(completion.generatedAttributes());
78-
plan = maybePushDownPastUnary(filter, completion, attributes::contains, NO_OP);
77+
var attributes = AttributeSet.of(inferencePlan.generatedAttributes());
78+
plan = maybePushDownPastUnary(filter, inferencePlan, attributes::contains, NO_OP);
7979
} else if (child instanceof Enrich enrich) {
8080
// Push down filters that do not rely on attributes created by Enrich
8181
var attributes = AttributeSet.of(Expressions.asAttributes(enrich.enrichFields()));

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PushDownAndCombineLimits.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
import org.elasticsearch.xpack.esql.plan.logical.Project;
1818
import org.elasticsearch.xpack.esql.plan.logical.RegexExtract;
1919
import org.elasticsearch.xpack.esql.plan.logical.UnaryPlan;
20-
import org.elasticsearch.xpack.esql.plan.logical.inference.Completion;
20+
import org.elasticsearch.xpack.esql.plan.logical.inference.InferencePlan;
2121
import org.elasticsearch.xpack.esql.plan.logical.join.Join;
2222
import org.elasticsearch.xpack.esql.plan.logical.join.JoinTypes;
2323

@@ -43,7 +43,7 @@ public LogicalPlan rule(Limit limit, LogicalOptimizerContext ctx) {
4343
|| unary instanceof Project
4444
|| unary instanceof RegexExtract
4545
|| unary instanceof Enrich
46-
|| unary instanceof Completion) {
46+
|| unary instanceof InferencePlan<?>) {
4747
return unary.replaceChild(limit.replaceChild(unary.child()));
4848
} else if (unary instanceof MvExpand) {
4949
// MV_EXPAND can increase the number of rows, so we cannot just push the limit down
Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,11 @@
88
package org.elasticsearch.xpack.esql.optimizer.rules.logical;
99

1010
import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
11-
import org.elasticsearch.xpack.esql.plan.logical.inference.Completion;
11+
import org.elasticsearch.xpack.esql.plan.logical.inference.InferencePlan;
1212

13-
public final class PushDownCompletion extends OptimizerRules.OptimizerRule<Completion> {
13+
public final class PushDownInferencePlan extends OptimizerRules.OptimizerRule<InferencePlan<?>> {
1414
@Override
15-
protected LogicalPlan rule(Completion p) {
15+
protected LogicalPlan rule(InferencePlan<?> p) {
1616
return PushDownUtils.pushGeneratingPlanPastProjectAndOrderBy(p);
1717
}
1818
}

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/LogicalPlanBuilder.java

Lines changed: 35 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -745,26 +745,17 @@ public PlanFactory visitRerankCommand(EsqlBaseParser.RerankCommandContext ctx) {
745745
return p -> visitRerankOptions(new Rerank.Builder(source, p, queryText, rerankFields), ctx.commandOptions()).build();
746746
}
747747

748-
private Rerank.Builder visitRerankOptions(Rerank.Builder rerannkBuilder, EsqlBaseParser.CommandOptionsContext ctx) {
748+
private Rerank.Builder visitRerankOptions(Rerank.Builder rerankBuilder, EsqlBaseParser.CommandOptionsContext ctx) {
749749
if (ctx == null) {
750-
return rerannkBuilder;
750+
return rerankBuilder;
751751
}
752752

753753
for (var option : ctx.commandOption()) {
754754
String optionName = visitIdentifier(option.identifier());
755-
if (optionName.equals(Rerank.Builder.INFERENCE_ID_OPTION_NAME)) {
756-
rerannkBuilder.withInferenceId(visitInferenceId(expression(option.primaryExpression())));
757-
} else if (optionName.equals(Rerank.Builder.SCORE_COLUMN_OPTION_NAME)) {
758-
if (expression(option.primaryExpression()) instanceof UnresolvedAttribute scoreAttribute) {
759-
rerannkBuilder.withScoreColumnAttribute(scoreAttribute);
760-
} else {
761-
throw new ParsingException(
762-
source(option.identifier()),
763-
"Option [{}] expects a valid attribute in RERANK command. [{}] provided.",
764-
option.identifier().getText(),
765-
option.primaryExpression().getText()
766-
);
767-
}
755+
if (optionName.equals(Rerank.INFERENCE_ID_OPTION_NAME)) {
756+
rerankBuilder.withInferenceId(visitInferenceId(expression(option.primaryExpression())));
757+
} else if (optionName.equals(Rerank.SCORE_COLUMN_OPTION_NAME)) {
758+
rerankBuilder.withScoreAttribute(visitRerankScoreAttribute(option));
768759
} else {
769760
throw new ParsingException(
770761
source(option.identifier()),
@@ -774,7 +765,7 @@ private Rerank.Builder visitRerankOptions(Rerank.Builder rerannkBuilder, EsqlBas
774765
}
775766
}
776767

777-
return rerannkBuilder;
768+
return rerankBuilder;
778769
}
779770

780771
@Override
@@ -789,6 +780,33 @@ public PlanFactory visitCompletionCommand(EsqlBaseParser.CompletionCommandContex
789780
return p -> new Completion(source, p, inferenceId, prompt, targetField);
790781
}
791782

783+
public UnresolvedAttribute visitRerankScoreAttribute(EsqlBaseParser.CommandOptionContext ctx) {
784+
if (ctx.primaryExpression() == null) {
785+
throw new ParsingException(source(ctx), "Parameter [{}] is null or undefined", ctx.identifier().getText());
786+
}
787+
788+
Expression optionValue = expression(ctx.primaryExpression());
789+
790+
if (optionValue instanceof UnresolvedAttribute scoreAttribute) {
791+
return scoreAttribute;
792+
} else if (optionValue instanceof Literal literal) {
793+
if (literal.value() == null) {
794+
throw new ParsingException(optionValue.source(), "Parameter [{}] is null or undefined", ctx.identifier().getText());
795+
}
796+
797+
if (literal.value() instanceof String attributeName) {
798+
return new UnresolvedAttribute(literal.source(), attributeName);
799+
}
800+
}
801+
802+
throw new ParsingException(
803+
source(ctx),
804+
"Option [{}] expects a valid attribute in RERANK command. [{}] provided.",
805+
ctx.identifier().getText(),
806+
ctx.primaryExpression().getText()
807+
);
808+
}
809+
792810
public Literal visitInferenceId(EsqlBaseParser.IdentifierOrParameterContext ctx) {
793811
if (ctx.identifier() != null) {
794812
return new Literal(source(ctx), visitIdentifier(ctx.identifier()), KEYWORD);
@@ -802,7 +820,7 @@ public Literal visitInferenceId(Expression expression) {
802820
if (literal.value() == null) {
803821
throw new ParsingException(
804822
expression.source(),
805-
"Query parameter [{}] is null or undefined and cannot be used as inference id",
823+
"Parameter [{}] is null or undefined and cannot be used as inference id",
806824
expression.source().text()
807825
);
808826
}

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/inference/Completion.java

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,7 @@
2222
import org.elasticsearch.xpack.esql.core.tree.Source;
2323
import org.elasticsearch.xpack.esql.core.type.DataType;
2424
import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput;
25-
import org.elasticsearch.xpack.esql.plan.GeneratingPlan;
2625
import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
27-
import org.elasticsearch.xpack.esql.plan.logical.SortAgnostic;
2826

2927
import java.io.IOException;
3028
import java.util.List;
@@ -34,12 +32,7 @@
3432
import static org.elasticsearch.xpack.esql.core.type.DataType.TEXT;
3533
import static org.elasticsearch.xpack.esql.expression.NamedExpressions.mergeOutputAttributes;
3634

37-
public class Completion extends InferencePlan<Completion>
38-
implements
39-
GeneratingPlan<Completion>,
40-
SortAgnostic,
41-
TelemetryAware,
42-
PostAnalysisVerificationAware {
35+
public class Completion extends InferencePlan<Completion> implements TelemetryAware, PostAnalysisVerificationAware {
4336

4437
public static final String DEFAULT_OUTPUT_FIELD_NAME = "completion";
4538

0 commit comments

Comments
 (0)