elastic · ioanatia · Mar 11, 2025 · Jan 30, 2025 · Feb 24, 2025 · Feb 25, 2025
diff --git a/docs/changelog/123396.yaml b/docs/changelog/123396.yaml
@@ -0,0 +1,5 @@
+pr: 123396
+summary: Add initial grammar and planning for RRF (snapshot)
+area: ES|QL
+type: feature
+issues: []
diff --git a/...n/esql/compute/src/main/java/org/elasticsearch/compute/operator/RrfScoreEvalOperator.java b/...n/esql/compute/src/main/java/org/elasticsearch/compute/operator/RrfScoreEvalOperator.java
@@ -0,0 +1,79 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.compute.operator;
+
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BytesRefBlock;
+import org.elasticsearch.compute.data.DoubleVector;
+import org.elasticsearch.compute.data.Page;
+
+import java.util.HashMap;
+
+/**
+ * Updates the score column with new scores using the RRF formula.
+ * Receives the position of the score and fork columns.
+ * The new score we assign to each row is equal to {@code 1 / (rank_constant + row_number)}.
+ * We use the fork discriminator column to determine the {@code row_number} for each row.
+ */
+public class RrfScoreEvalOperator extends AbstractPageMappingOperator {
+
+    public record Factory(int forkPosition, int scorePosition) implements OperatorFactory {
+        @Override
+        public Operator get(DriverContext driverContext) {
+            return new RrfScoreEvalOperator(forkPosition, scorePosition);
+        }
+
+        @Override
+        public String describe() {
+            return "RrfScoreEvalOperator";
+        }
+
+    }
+
+    private final int scorePosition;
+    private final int forkPosition;
+
+    private HashMap<String, Integer> counters = new HashMap<>();
+
+    public RrfScoreEvalOperator(int forkPosition, int scorePosition) {
+        this.scorePosition = scorePosition;
+        this.forkPosition = forkPosition;
+    }
+
+    @Override
+    protected Page process(Page page) {
+        BytesRefBlock forkBlock = (BytesRefBlock) page.getBlock(forkPosition);
+
+        DoubleVector.Builder scores = forkBlock.blockFactory().newDoubleVectorBuilder(forkBlock.getPositionCount());
+
+        for (int i = 0; i < page.getPositionCount(); i++) {
+            String fork = forkBlock.getBytesRef(i, new BytesRef()).utf8ToString();
+
+            int rank = counters.getOrDefault(fork, 1);
+            counters.put(fork, rank + 1);
+            scores.appendDouble(1.0 / (60 + rank));
+        }
+
+        Block scoreBlock = scores.build().asBlock();
+        page = page.appendBlock(scoreBlock);
+
+        int[] projections = new int[page.getBlockCount() - 1];
+
+        for (int i = 0; i < page.getBlockCount() - 1; i++) {
+            projections[i] = i == scorePosition ? page.getBlockCount() - 1 : i;
+        }
+
+        return page.projectBlocks(projections);
+    }
+
+    @Override
+    public String toString() {
+        return "RrfScoreEvalOperator";
+    }
+}
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/rrf.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/rrf.csv-spec
@@ -0,0 +1,111 @@
+//
+// CSV spec for RRF command
+//
+
+simpleRrf
+required_capability: fork
+required_capability: rrf
+required_capability: match_operator_colon
+
+FROM employees METADATA _id, _index, _score
+| FORK ( WHERE emp_no:10001 )
+       ( WHERE emp_no:10002 )
+| RRF
+| EVAL _score = round(_score, 4)
+| KEEP _score, _fork, emp_no
+| SORT _score, _fork, emp_no
+;
+
+_score:double | _fork:keyword | emp_no:integer
+0.0164        | fork1         | 10001
+0.0164        | fork2         | 10002
+;
+
+rrfWithMatchAndScore
+required_capability: fork
+required_capability: rrf
+required_capability: match_operator_colon
+
+FROM books METADATA _id, _index, _score
+| FORK ( WHERE title:"Tolkien" | SORT _score, _id DESC | LIMIT 3 )
+       ( WHERE author:"Tolkien" | SORT _score, _id DESC | LIMIT 3 )
+| RRF
+| EVAL _fork = mv_sort(_fork)
+| EVAL _score = round(_score, 5)
+| KEEP _score, _fork, _id
+;
+
+_score:double | _fork:keyword  | _id:keyword
+0.03279       | [fork1, fork2] | 4
+0.01613       | fork1          | 56
+0.01613       | fork2          | 60
+0.01587       | fork2          | 1
+0.01587       | fork1          | 26
+;
+
+rrfWithDisjunctionAndPostFilter
+required_capability: fork
+required_capability: rrf
+required_capability: match_operator_colon
+
+FROM books METADATA _id, _index, _score
+| FORK ( WHERE title:"Tolkien" OR author:"Tolkien" | SORT _score, _id DESC | LIMIT 3 )
+       ( WHERE author:"Tolkien" | SORT _score, _id DESC | LIMIT 3 )
+| RRF
+| EVAL _fork = mv_sort(_fork)
+| EVAL _score = round(_score, 5)
+| KEEP _score, _fork, _id
+| WHERE _score > 0.014
+;
+
+_score:double | _fork:keyword  | _id:keyword
+0.03252       | [fork1, fork2] | 60
+0.032         | [fork1, fork2] | 1
+0.01639       | fork2          | 4
+0.01587       | fork1          | 40
+;
+
+rrfWithStats
+required_capability: fork
+required_capability: rrf
+required_capability: match_operator_colon
+
+FROM books METADATA _id, _index, _score
+| FORK ( WHERE title:"Tolkien" | SORT _score, _id DESC | LIMIT 3 )
+       ( WHERE author:"Tolkien" | SORT _score, _id DESC | LIMIT 3 )
+       ( WHERE author:"Ursula K. Le Guin" AND title:"short stories" | SORT _score, _id DESC | LIMIT 3)
+| RRF
+| STATS count_fork=COUNT(*) BY _fork
+;
+
+count_fork:long | _fork:keyword
+3               | fork1
+3               | fork2
+1               | fork3
+;
+
+rrfWithMultipleForkBranches
+required_capability: fork
+required_capability: rrf
+required_capability: match_operator_colon
+
+FROM books METADATA _id, _index, _score
+| FORK (WHERE author:"Keith Faulkner" AND qstr("author:Rory or author:Beverlie") | SORT _score, _id DESC | LIMIT 3)
+       (WHERE author:"Ursula K. Le Guin" | SORT _score, _id DESC | LIMIT 3)
+       (WHERE title:"Tolkien" AND author:"Tolkien" AND year > 2000 AND mv_count(author) == 1 | SORT _score, _id DESC | LIMIT 3)
+       (WHERE match(author, "Keith Faulkner") AND match(author, "Rory Tyger") | SORT _score, _id DESC | LIMIT 3)
+| RRF
+| EVAL _fork = mv_sort(_fork)
+| EVAL _score = round(_score, 4)
+| EVAL title = trim(substring(title, 1, 20))
+| KEEP _score, author, title, _fork
+;
+
+_score:double | author:keyword                    | title:keyword        | _fork:keyword
+0.0328        | [Keith Faulkner, Rory Tyger]      | Pop! Went Another Ba | [fork1, fork4]
+0.0164        | J.R.R. Tolkien                    | Letters of J R R Tol | fork3
+0.0164        | Ursula K. Le Guin                 | The wind's twelve qu | fork2
+0.0161        | [Beverlie Manson, Keith Faulkner] | Rainbow's End: A Mag | fork1
+0.0161        | Ursula K. Le Guin                 | The Word For World i | fork2
+0.0159        | Ursula K. Le Guin                 | The Dispossessed     | fork2
+;
diff --git a/.../plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/ForkIT.java b/.../plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/ForkIT.java
@@ -350,6 +350,32 @@ public void testScoringKeepAndSort() {
         }
     }
 
+    public void testRrf() {
+        assumeTrue("requires RRF capability", EsqlCapabilities.Cap.RRF.isEnabled());
+
+        var query = """
+            FROM test METADATA _score, _id, _index
+            | WHERE id > 2
+            | FORK
+               ( WHERE content:"fox" | SORT _score, _id DESC )
+               ( WHERE content:"dog" | SORT _score, _id DESC )
+            | RRF
+            | EVAL _score = round(_score, 4)
+            | KEEP id, content, _score, _fork
+            """;
+        try (var resp = run(query)) {
+            assertColumnNames(resp.columns(), List.of("id", "content", "_score", "_fork"));
+            assertColumnTypes(resp.columns(), List.of("integer", "keyword", "double", "keyword"));
+            assertThat(getValuesList(resp.values()).size(), equalTo(3));
+            Iterable<Iterable<Object>> expectedValues = List.of(
+                List.of(6, "The quick brown fox jumps over the lazy dog", 0.0325, List.of("fork1", "fork2")),
+                List.of(4, "The dog is brown but this document is very very long", 0.0164, "fork2"),
+                List.of(3, "This dog is really brown", 0.0159, "fork2")
+            );
+            assertValues(resp.values(), expectedValues);
+        }
+    }
+
     public void testThreeSubQueries() {
         var query = """
             FROM test

diff --git a/x-pack/plugin/esql/src/main/antlr/EsqlBaseLexer.g4 b/x-pack/plugin/esql/src/main/antlr/EsqlBaseLexer.g4
@@ -65,6 +65,7 @@ import ChangePoint,
        Metrics,
        MvExpand,
        Project,
+       Rrf,
        Rename,
        Show,
        UnknownCommand;