Skip to content

Commit 75ac0ee

Browse files
authored
[ES|QL] Rerank operator improvements (#132318)
1 parent 3bfb0e8 commit 75ac0ee

File tree

18 files changed

+489
-155
lines changed

18 files changed

+489
-155
lines changed

x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/books.csv

Lines changed: 80 additions & 80 deletions
Large diffs are not rendered by default.

x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-books.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@
2525
},
2626
"year": {
2727
"type": "integer"
28+
},
29+
"collection": {
30+
"type": "text"
2831
}
2932
}
3033
}

x-pack/plugin/esql/qa/testFixtures/src/main/resources/rerank.csv-spec

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,45 @@ book_no:keyword | title:text | author
6161
5327 | War and Peace | Leo Tolstoy | 0.08
6262
;
6363

64+
reranker using a non text fields
65+
required_capability: rerank
66+
required_capability: match_operator_colon
67+
68+
FROM books METADATA _score
69+
| WHERE title:"war and peace" AND author:"Tolstoy"
70+
| RERANK "war and peace" ON ratings WITH { "inference_id" : "test_reranker" }
71+
| EVAL _score=ROUND(_score, 2), ratings = ROUND(ratings, 2)
72+
| SORT _score DESC, book_no ASC
73+
| KEEP book_no, title, ratings, _score
74+
;
75+
76+
book_no:keyword | title:text | ratings:double | _score:double
77+
2776 | The Devil and Other Stories (Oxford World's Classics) | 5.0 | 0.33
78+
4536 | War and Peace (Signet Classics) | 4.75 | 0.25
79+
5327 | War and Peace | 3.84 | 0.06
80+
9032 | War and Peace: A Novel (6 Volumes) | 3.81 | 0.06
81+
;
82+
83+
84+
reranker using a sparse input field
85+
required_capability: rerank
86+
required_capability: match_operator_colon
87+
88+
FROM books METADATA _score
89+
| WHERE MATCH(title, "lord of the rings", {"minimum_should_match": "100%"}) AND author:"tolkien"
90+
| RERANK rerank_score="war and peace" ON collection WITH { "inference_id" : "test_reranker" }
91+
| EVAL _score=ROUND(_score, 2), rerank_score=ROUND(rerank_score, 2)
92+
| SORT rerank_score DESC NULLS LAST, _score DESC
93+
| LIMIT 3
94+
| KEEP book_no, title, author, collection, rerank_score, _score
95+
;
96+
97+
book_no:keyword | title:text | author:text | collection:text | rerank_score:double | _score:double
98+
2714 | Return of the King Being the Third Part of The Lord of the Rings | J. R. R. Tolkien | The Lord of the Rings | 0.05 | 8.56
99+
2675 | The Lord of the Rings - Boxed Set | J.R.R. Tolkien | The Lord of the Rings | 0.05 | 8.35
100+
7140 | The Lord of the Rings Poster Collection: Six Paintings by Alan Lee (No. 1) | [Alan Lee, J. R. R. Tolkien] | null | null | 5.5
101+
;
102+
64103

65104
reranker using multiple fields
66105
required_capability: rerank
@@ -82,6 +121,26 @@ book_no:keyword | title:text | author
82121
;
83122

84123

124+
reranker using multiple fields with some non text fields
125+
required_capability: rerank
126+
required_capability: match_operator_colon
127+
128+
FROM books METADATA _score
129+
| WHERE title:"war and peace" AND author:"Tolstoy"
130+
| RERANK "war and peace" ON title, ratings WITH { "inference_id" : "test_reranker" }
131+
| EVAL _score=ROUND(_score, 2), ratings = ROUND(ratings, 2)
132+
| SORT _score DESC, book_no ASC
133+
| KEEP book_no, title, ratings, _score
134+
;
135+
136+
book_no:keyword | title:text | ratings:double | _score:double
137+
4536 | War and Peace (Signet Classics) | 4.75 | 0.02
138+
5327 | War and Peace | 3.84 | 0.02
139+
2776 | The Devil and Other Stories (Oxford World's Classics) | 5.0 | 0.01
140+
9032 | War and Peace: A Novel (6 Volumes) | 3.81 | 0.01
141+
;
142+
143+
85144
reranker after a limit
86145
required_capability: rerank
87146
required_capability: match_operator_colon

x-pack/plugin/esql/qa/testFixtures/src/main/resources/scoring.csv-spec

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -351,8 +351,8 @@ from books metadata _score
351351

352352
book_no:keyword | c_score:double
353353
8875 | 3.0
354+
7480 | 1.0
354355
7350 | 1.0
355-
7140 | 3.0
356356
;
357357

358358
QstrScoreManipulation

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@
7979
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDouble;
8080
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToInteger;
8181
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToLong;
82+
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToString;
8283
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToUnsignedLong;
8384
import org.elasticsearch.xpack.esql.expression.function.scalar.nulls.Coalesce;
8485
import org.elasticsearch.xpack.esql.expression.function.vector.VectorFunction;
@@ -813,11 +814,23 @@ private LogicalPlan resolveRerank(Rerank rerank, List<Attribute> childrenOutput)
813814
List<Alias> newFields = new ArrayList<>();
814815
boolean changed = false;
815816

817+
// Do not need to cast as string if there are multiple rerank fields since it will be converted to YAML.
818+
boolean castRerankFieldsAsString = rerank.rerankFields().size() < 2;
819+
816820
// First resolving fields used in expression
817821
for (Alias field : rerank.rerankFields()) {
818-
Alias result = (Alias) field.transformUp(UnresolvedAttribute.class, ua -> resolveAttribute(ua, childrenOutput));
819-
newFields.add(result);
820-
changed |= result != field;
822+
Alias resolved = (Alias) field.transformUp(UnresolvedAttribute.class, ua -> resolveAttribute(ua, childrenOutput));
823+
824+
if (resolved.resolved()) {
825+
if (castRerankFieldsAsString
826+
&& rerank.isValidRerankField(resolved)
827+
&& DataType.isString(resolved.dataType()) == false) {
828+
resolved = resolved.replaceChild(new ToString(resolved.child().source(), resolved.child()));
829+
}
830+
}
831+
832+
newFields.add(resolved);
833+
changed |= resolved != field;
821834
}
822835

823836
if (changed) {

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/inference/XContentRowEncoder.java

Lines changed: 23 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -77,10 +77,10 @@ public void close() {
7777
@Override
7878
public BytesRefBlock eval(Page page) {
7979
Block[] fieldValueBlocks = new Block[fieldsValueEvaluators.length];
80+
8081
try (
8182
BytesRefStreamOutput outputStream = new BytesRefStreamOutput();
82-
XContentBuilder xContentBuilder = XContentFactory.contentBuilder(xContentType, outputStream);
83-
BytesRefBlock.Builder outputBlockBuilder = blockFactory.newBytesRefBlockBuilder(page.getPositionCount());
83+
BytesRefBlock.Builder outputBlockBuilder = blockFactory.newBytesRefBlockBuilder(page.getPositionCount())
8484
) {
8585

8686
PositionToXContent[] toXContents = new PositionToXContent[fieldsValueEvaluators.length];
@@ -90,18 +90,28 @@ public BytesRefBlock eval(Page page) {
9090
}
9191

9292
for (int pos = 0; pos < page.getPositionCount(); pos++) {
93-
xContentBuilder.startObject();
94-
for (int i = 0; i < fieldValueBlocks.length; i++) {
95-
String fieldName = columnsInfo[i].name();
96-
Block currentBlock = fieldValueBlocks[i];
97-
if (currentBlock.isNull(pos) || currentBlock.getValueCount(pos) < 1) {
98-
continue;
93+
try (XContentBuilder xContentBuilder = XContentFactory.contentBuilder(xContentType, outputStream)) {
94+
95+
xContentBuilder.startObject();
96+
boolean hasNullsOnly = true;
97+
for (int i = 0; i < fieldValueBlocks.length; i++) {
98+
String fieldName = columnsInfo[i].name();
99+
Block currentBlock = fieldValueBlocks[i];
100+
if (currentBlock.isNull(pos) || currentBlock.getValueCount(pos) < 1) {
101+
continue;
102+
}
103+
hasNullsOnly = false;
104+
toXContents[i].positionToXContent(xContentBuilder.field(fieldName), ToXContent.EMPTY_PARAMS, pos);
105+
}
106+
xContentBuilder.endObject().flush();
107+
108+
if (hasNullsOnly) {
109+
outputBlockBuilder.appendNull();
110+
} else {
111+
outputBlockBuilder.appendBytesRef(outputStream.get());
112+
outputStream.reset();
99113
}
100-
toXContents[i].positionToXContent(xContentBuilder.field(fieldName), ToXContent.EMPTY_PARAMS, pos);
101114
}
102-
xContentBuilder.endObject().flush();
103-
outputBlockBuilder.appendBytesRef(outputStream.get());
104-
outputStream.reset();
105115
}
106116

107117
return outputBlockBuilder.build();
@@ -125,7 +135,7 @@ public static class Factory implements ExpressionEvaluator.Factory {
125135
private final XContentType xContentType;
126136
private final Map<ColumnInfoImpl, ExpressionEvaluator.Factory> fieldsEvaluatorFactories;
127137

128-
private Factory(XContentType xContentType, Map<ColumnInfoImpl, ExpressionEvaluator.Factory> fieldsEvaluatorFactories) {
138+
Factory(XContentType xContentType, Map<ColumnInfoImpl, ExpressionEvaluator.Factory> fieldsEvaluatorFactories) {
129139
this.xContentType = xContentType;
130140
this.fieldsEvaluatorFactories = fieldsEvaluatorFactories;
131141
}

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/inference/completion/CompletionOperatorRequestIterator.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ public InferenceAction.Request next() {
5151
if (hasNext() == false) {
5252
throw new NoSuchElementException();
5353
}
54+
5455
return inferenceRequest(promptReader.readPrompt(currentPos++));
5556
}
5657

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/inference/rerank/RerankOperatorOutputBuilder.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,10 +80,16 @@ public Page buildOutput() {
8080
*/
8181
@Override
8282
public void addInferenceResponse(InferenceAction.Response inferenceResponse) {
83+
if (inferenceResponse == null) {
84+
scoreBlockBuilder.appendNull();
85+
return;
86+
}
87+
8388
Iterator<RankedDocsResults.RankedDoc> sortedRankedDocIterator = inferenceResults(inferenceResponse).getRankedDocs()
8489
.stream()
8590
.sorted(Comparator.comparingInt(RankedDocsResults.RankedDoc::index))
8691
.iterator();
92+
8793
while (sortedRankedDocIterator.hasNext()) {
8894
scoreBlockBuilder.appendDouble(sortedRankedDocIterator.next().relevanceScore());
8995
}

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/inference/rerank/RerankOperatorRequestIterator.java

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -51,22 +51,28 @@ public InferenceAction.Request next() {
5151
throw new NoSuchElementException();
5252
}
5353

54-
final int inputSize = Math.min(remainingPositions, batchSize);
55-
final List<String> inputs = new ArrayList<>(inputSize);
54+
final int maxInputSize = Math.min(remainingPositions, batchSize);
55+
final List<String> inputs = new ArrayList<>(maxInputSize);
5656
BytesRef scratch = new BytesRef();
5757

5858
int startIndex = inputBlock.getPositionCount() - remainingPositions;
59-
for (int i = 0; i < inputSize; i++) {
59+
60+
if (inputBlock.isNull(startIndex)) {
61+
remainingPositions -= 1;
62+
return null;
63+
}
64+
65+
for (int i = 0; i < maxInputSize; i++) {
6066
int pos = startIndex + i;
6167
if (inputBlock.isNull(pos)) {
62-
inputs.add("");
68+
break;
6369
} else {
6470
scratch = inputBlock.getBytesRef(inputBlock.getFirstValueIndex(pos), scratch);
6571
inputs.add(BytesRefs.toString(scratch));
6672
}
6773
}
6874

69-
remainingPositions -= inputSize;
75+
remainingPositions -= inputs.size();
7076
return inferenceRequest(inputs);
7177
}
7278

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/LogicalPlanBuilder.java

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -758,18 +758,6 @@ public PlanFactory visitRerankCommand(EsqlBaseParser.RerankCommandContext ctx) {
758758
Expression queryText = expression(ctx.queryText);
759759
Attribute scoreAttribute = visitQualifiedName(ctx.targetField, new UnresolvedAttribute(source, MetadataAttribute.SCORE));
760760

761-
if (queryText instanceof Literal queryTextLiteral && DataType.isString(queryText.dataType())) {
762-
if (queryTextLiteral.value() == null) {
763-
throw new ParsingException(source(ctx.queryText), "Query cannot be null or undefined in RERANK", ctx.queryText.getText());
764-
}
765-
} else {
766-
throw new ParsingException(
767-
source(ctx.queryText),
768-
"Query must be a valid string in RERANK, found [{}]",
769-
ctx.queryText.getText()
770-
);
771-
}
772-
773761
return p -> {
774762
checkForRemoteClusters(p, source, "RERANK");
775763
return applyRerankOptions(new Rerank(source, p, queryText, rerankFields, scoreAttribute), ctx.commandNamedParameters());

0 commit comments

Comments
 (0)