Skip to content

Commit be9d4e9

Browse files
authored
ES|QL: Add support for RRF options (#134227)
1 parent 70b7903 commit be9d4e9

File tree

27 files changed

+2637
-2166
lines changed

27 files changed

+2637
-2166
lines changed

x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/RrfScoreEvalOperator.java

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import org.elasticsearch.compute.data.Page;
1515

1616
import java.util.HashMap;
17+
import java.util.Map;
1718

1819
/**
1920
* Updates the score column with new scores using the RRF formula.
@@ -23,10 +24,12 @@
2324
*/
2425
public class RrfScoreEvalOperator extends AbstractPageMappingOperator {
2526

26-
public record Factory(int forkPosition, int scorePosition) implements OperatorFactory {
27+
public record Factory(int forkPosition, int scorePosition, double rankConstant, Map<String, Double> weights)
28+
implements
29+
OperatorFactory {
2730
@Override
2831
public Operator get(DriverContext driverContext) {
29-
return new RrfScoreEvalOperator(forkPosition, scorePosition);
32+
return new RrfScoreEvalOperator(forkPosition, scorePosition, rankConstant, weights);
3033
}
3134

3235
@Override
@@ -38,26 +41,33 @@ public String describe() {
3841

3942
private final int scorePosition;
4043
private final int forkPosition;
44+
private final double rankConstant;
45+
private final Map<String, Double> weights;
4146

4247
private HashMap<String, Integer> counters = new HashMap<>();
4348

44-
public RrfScoreEvalOperator(int forkPosition, int scorePosition) {
49+
public RrfScoreEvalOperator(int forkPosition, int scorePosition, double rankConstant, Map<String, Double> weights) {
4550
this.scorePosition = scorePosition;
4651
this.forkPosition = forkPosition;
52+
this.rankConstant = rankConstant;
53+
this.weights = weights;
4754
}
4855

4956
@Override
5057
protected Page process(Page page) {
51-
BytesRefBlock forkBlock = (BytesRefBlock) page.getBlock(forkPosition);
58+
BytesRefBlock discriminatorBlock = (BytesRefBlock) page.getBlock(forkPosition);
5259

53-
DoubleVector.Builder scores = forkBlock.blockFactory().newDoubleVectorBuilder(forkBlock.getPositionCount());
60+
DoubleVector.Builder scores = discriminatorBlock.blockFactory().newDoubleVectorBuilder(discriminatorBlock.getPositionCount());
5461

5562
for (int i = 0; i < page.getPositionCount(); i++) {
56-
String fork = forkBlock.getBytesRef(i, new BytesRef()).utf8ToString();
63+
String discriminator = discriminatorBlock.getBytesRef(i, new BytesRef()).utf8ToString();
5764

58-
int rank = counters.getOrDefault(fork, 1);
59-
counters.put(fork, rank + 1);
60-
scores.appendDouble(1.0 / (60 + rank));
65+
int rank = counters.getOrDefault(discriminator, 1);
66+
counters.put(discriminator, rank + 1);
67+
68+
var weight = weights.getOrDefault(discriminator, 1.0);
69+
70+
scores.appendDouble(1.0 / (this.rankConstant + rank) * weight);
6171
}
6272

6373
Block scoreBlock = scores.build().asBlock();

x-pack/plugin/esql/qa/testFixtures/src/main/resources/fuse.csv-spec

Lines changed: 102 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
simpleFuse
66
required_capability: fork_v9
7-
required_capability: fuse
7+
required_capability: fuse_v2
88
required_capability: match_operator_colon
99

1010
FROM employees METADATA _id, _index, _score
@@ -23,7 +23,7 @@ _score:double | _fork:keyword | emp_no:integer
2323

2424
fuseWithMatchAndScore
2525
required_capability: fork_v9
26-
required_capability: fuse
26+
required_capability: fuse_v2
2727
required_capability: match_operator_colon
2828

2929
FROM books METADATA _id, _index, _score
@@ -46,7 +46,7 @@ _score:double | _fork:keyword | _id:keyword
4646

4747
fuseWithDisjunctionAndPostFilter
4848
required_capability: fork_v9
49-
required_capability: fuse
49+
required_capability: fuse_v2
5050
required_capability: match_operator_colon
5151

5252
FROM books METADATA _id, _index, _score
@@ -69,7 +69,7 @@ _score:double | _fork:keyword | _id:keyword
6969

7070
fuseWithStats
7171
required_capability: fork_v9
72-
required_capability: fuse
72+
required_capability: fuse_v2
7373
required_capability: match_operator_colon
7474

7575
FROM books METADATA _id, _index, _score
@@ -89,7 +89,7 @@ count_fork:long | _fork:keyword
8989

9090
fuseWithMultipleForkBranches
9191
required_capability: fork_v9
92-
required_capability: fuse
92+
required_capability: fuse_v2
9393
required_capability: match_operator_colon
9494

9595
FROM books METADATA _id, _index, _score
@@ -116,7 +116,7 @@ _score:double | author:keyword | title:keyword | _fork
116116

117117
fuseWithSemanticSearch
118118
required_capability: fork_v9
119-
required_capability: fuse
119+
required_capability: fuse_v2
120120
required_capability: semantic_text_field_caps
121121
required_capability: metadata_score
122122

@@ -134,3 +134,99 @@ _fork:keyword | _score:double | _id:keyword | semantic_text_field:keyword
134134
[fork1, fork2] | 0.0328 | 2 | all we have to decide is what to do with the time that is given to us
135135
[fork1, fork2] | 0.0323 | 3 | be excellent to each other
136136
;
137+
138+
fuseWithSimpleRrf
139+
required_capability: fork_v9
140+
required_capability: fuse_v2
141+
required_capability: semantic_text_field_caps
142+
required_capability: metadata_score
143+
144+
FROM books METADATA _id, _index, _score
145+
| FORK ( WHERE title:"Tolkien" | SORT _score, _id DESC | LIMIT 3 )
146+
( WHERE author:"Tolkien" | SORT _score, _id DESC | LIMIT 3 )
147+
| FUSE rrf
148+
| SORT _score DESC, _id, _index
149+
| EVAL _fork = mv_sort(_fork)
150+
| EVAL _score = round(_score, 5)
151+
| KEEP _score, _fork, _id
152+
;
153+
154+
_score:double | _fork:keyword | _id:keyword
155+
0.03279 | [fork1, fork2] | 4
156+
0.01613 | fork1 | 56
157+
0.01613 | fork2 | 60
158+
0.01587 | fork2 | 1
159+
0.01587 | fork1 | 26
160+
;
161+
162+
fuseWithRrfAndRankConstant
163+
required_capability: fork_v9
164+
required_capability: fuse_v2
165+
required_capability: semantic_text_field_caps
166+
required_capability: metadata_score
167+
168+
FROM books METADATA _id, _index, _score
169+
| FORK ( WHERE title:"Tolkien" | SORT _score, _id DESC | LIMIT 3 )
170+
( WHERE author:"Tolkien" | SORT _score, _id DESC | LIMIT 3 )
171+
| FUSE rrf WITH {"rank_constant": 50 }
172+
| SORT _score DESC, _id, _index
173+
| EVAL _fork = mv_sort(_fork)
174+
| EVAL _score = round(_score, 5)
175+
| KEEP _score, _fork, _id
176+
;
177+
178+
_score:double | _fork:keyword | _id:keyword
179+
0.03922 | [fork1, fork2] | 4
180+
0.01923 | fork1 | 56
181+
0.01923 | fork2 | 60
182+
0.01887 | fork2 | 1
183+
0.01887 | fork1 | 26
184+
;
185+
186+
fuseWithRrfAndWeights
187+
required_capability: fork_v9
188+
required_capability: fuse_v2
189+
required_capability: semantic_text_field_caps
190+
required_capability: metadata_score
191+
192+
FROM books METADATA _id, _index, _score
193+
| FORK ( WHERE title:"Tolkien" | SORT _score, _id DESC | LIMIT 3 )
194+
( WHERE author:"Tolkien" | SORT _score, _id DESC | LIMIT 3 )
195+
| FUSE rrf WITH {"weights": { "fork1": 0.3, "fork2": 0.7 } }
196+
| SORT _score DESC, _id, _index
197+
| EVAL _fork = mv_sort(_fork)
198+
| EVAL _score = round(_score, 5)
199+
| KEEP _score, _fork, _id
200+
;
201+
202+
_score:double | _fork:keyword | _id:keyword
203+
0.01639 | [fork1, fork2] | 4
204+
0.01129 | fork2 | 60
205+
0.01111 | fork2 | 1
206+
0.00484 | fork1 | 56
207+
0.00476 | fork1 | 26
208+
;
209+
210+
fuseWithRrfRankConstantAndWeights
211+
required_capability: fork_v9
212+
required_capability: fuse_v2
213+
required_capability: semantic_text_field_caps
214+
required_capability: metadata_score
215+
216+
FROM books METADATA _id, _score, _index
217+
| FORK ( WHERE title:"Tolkien" | SORT _score, _id DESC | LIMIT 3 )
218+
( WHERE author:"Tolkien" | SORT _score, _id DESC | LIMIT 3)
219+
| FUSE rrf WITH {"rank_constant": 60, "weights": { "fork1": 0.3, "fork2": 0.7 } }
220+
| SORT _score DESC, _id, _index
221+
| EVAL _fork = mv_sort(_fork)
222+
| EVAL _score = round(_score, 5)
223+
| KEEP _score, _fork, _id
224+
;
225+
226+
_score:double | _fork:keyword | _id:keyword
227+
0.01639 | [fork1, fork2] | 4
228+
0.01129 | fork2 | 60
229+
0.01111 | fork2 | 1
230+
0.00484 | fork1 | 56
231+
0.00476 | fork1 | 26
232+
;

x-pack/plugin/esql/qa/testFixtures/src/main/resources/rerank.csv-spec

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ book_no:keyword | title:text | author
204204

205205
reranker after FUSE
206206
required_capability: fork_v9
207-
required_capability: fuse
207+
required_capability: fuse_v2
208208
required_capability: match_operator_colon
209209
required_capability: rerank
210210

x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/FuseIT.java

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ protected Collection<Class<? extends Plugin>> nodePlugins() {
2828

2929
@Before
3030
public void setupIndex() {
31-
assumeTrue("requires FUSE capability", EsqlCapabilities.Cap.FUSE.isEnabled());
31+
assumeTrue("requires FUSE capability", EsqlCapabilities.Cap.FUSE_V2.isEnabled());
3232
createAndPopulateIndex();
3333
}
3434

@@ -58,6 +58,58 @@ public void testFuseWithRrf() throws Exception {
5858
}
5959
}
6060

61+
public void testFuseRrfWithWeights() {
62+
var query = """
63+
FROM test METADATA _score, _id, _index
64+
| WHERE id > 2
65+
| FORK
66+
( WHERE content:"fox" | SORT _score, _id DESC )
67+
( WHERE content:"dog" | SORT _score, _id DESC )
68+
| FUSE RRF WITH {"weights": { "fork1": 0.4, "fork2": 0.6}}
69+
| SORT _score DESC, _id, _index
70+
| EVAL _fork = mv_sort(_fork)
71+
| EVAL _score = round(_score, 4)
72+
| KEEP id, content, _score, _fork
73+
""";
74+
try (var resp = run(query)) {
75+
assertColumnNames(resp.columns(), List.of("id", "content", "_score", "_fork"));
76+
assertColumnTypes(resp.columns(), List.of("integer", "keyword", "double", "keyword"));
77+
assertThat(getValuesList(resp.values()).size(), equalTo(3));
78+
Iterable<Iterable<Object>> expectedValues = List.of(
79+
List.of(6, "The quick brown fox jumps over the lazy dog", 0.0162, List.of("fork1", "fork2")),
80+
List.of(4, "The dog is brown but this document is very very long", 0.0098, "fork2"),
81+
List.of(3, "This dog is really brown", 0.0095, "fork2")
82+
);
83+
assertValues(resp.values(), expectedValues);
84+
}
85+
}
86+
87+
public void testFuseRrfWithWeightsAndRankConstant() {
88+
var query = """
89+
FROM test METADATA _score, _id, _index
90+
| WHERE id > 2
91+
| FORK
92+
( WHERE content:"fox" | SORT _score, _id DESC )
93+
( WHERE content:"dog" | SORT _score, _id DESC )
94+
| FUSE RRF WITH {"weights": { "fork1": 0.4, "fork2": 0.6}, "rank_constant": 55 }
95+
| SORT _score DESC, _id, _index
96+
| EVAL _fork = mv_sort(_fork)
97+
| EVAL _score = round(_score, 4)
98+
| KEEP id, content, _score, _fork
99+
""";
100+
try (var resp = run(query)) {
101+
assertColumnNames(resp.columns(), List.of("id", "content", "_score", "_fork"));
102+
assertColumnTypes(resp.columns(), List.of("integer", "keyword", "double", "keyword"));
103+
assertThat(getValuesList(resp.values()).size(), equalTo(3));
104+
Iterable<Iterable<Object>> expectedValues = List.of(
105+
List.of(6, "The quick brown fox jumps over the lazy dog", 0.0177, List.of("fork1", "fork2")),
106+
List.of(4, "The dog is brown but this document is very very long", 0.0107, "fork2"),
107+
List.of(3, "This dog is really brown", 0.0103, "fork2")
108+
);
109+
assertValues(resp.values(), expectedValues);
110+
}
111+
}
112+
61113
private void createAndPopulateIndex() {
62114
var indexName = "test";
63115
var client = client().admin().indices();

x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/FuseWithInvalidLicenseIT.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ protected Collection<Class<? extends Plugin>> nodePlugins() {
3131

3232
@Before
3333
public void setupIndex() {
34-
assumeTrue("requires FUSE capability", EsqlCapabilities.Cap.FUSE.isEnabled());
34+
assumeTrue("requires FUSE capability", EsqlCapabilities.Cap.FUSE_V2.isEnabled());
3535
var indexName = "test";
3636
var client = client().admin().indices();
3737
var CreateRequest = client.prepareCreate(indexName)

x-pack/plugin/esql/src/main/antlr/EsqlBaseLexer.tokens

Lines changed: 36 additions & 33 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

x-pack/plugin/esql/src/main/antlr/EsqlBaseParser.g4

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,7 @@ insistCommand
336336
;
337337

338338
fuseCommand
339-
: DEV_FUSE
339+
: DEV_FUSE (fuseType=identifier)? fuseOptions=commandNamedParameters
340340
;
341341

342342
setCommand

0 commit comments

Comments
 (0)