elastic · pmpailis · Oct 3, 2024 · Oct 3, 2024 · Oct 3, 2024
diff --git a/docs/reference/search/rrf.asciidoc b/docs/reference/search/rrf.asciidoc
@@ -300,13 +300,12 @@ We have both the ranker's `score` and the `_rank` option to show our top-ranked
             "value" : 5,
             "relation" : "eq"
         },
-        "max_score" : null,
+        "max_score" : ...,
         "hits" : [
             {
                 "_index" : "example-index",
                 "_id" : "3",
                 "_score" : 0.8333334,
-                "_rank" : 1,
                 "_source" : {
                     "integer" : 1,
                     "vector" : [
@@ -319,7 +318,6 @@ We have both the ranker's `score` and the `_rank` option to show our top-ranked
                 "_index" : "example-index",
                 "_id" : "2",
                 "_score" : 0.5833334,
-                "_rank" : 2,
                 "_source" : {
                     "integer" : 2,
                     "vector" : [
@@ -332,7 +330,6 @@ We have both the ranker's `score` and the `_rank` option to show our top-ranked
                 "_index" : "example-index",
                 "_id" : "4",
                 "_score" : 0.5,
-                "_rank" : 3,
                 "_source" : {
                     "integer" : 2,
                     "text" : "rrf rrf rrf rrf"
@@ -499,7 +496,6 @@ Working with the example above, and by adding `explain=true` to the search reque
             "_index": "example-index",
             "_id": "3",
             "_score": 0.8333334,
-            "_rank": 1,
             "_explanation":
             {
                 "value": 0.8333334,                                                                                                                                               <1>
@@ -608,7 +604,6 @@ The response would now include the named query in the explanation:
             "_index": "example-index",
             "_id": "3",
             "_score": 0.8333334,
-            "_rank": 1,
             "_explanation":
             {
                 "value": 0.8333334,

diff --git a/...nternalClusterTest/java/org/elasticsearch/search/retriever/RankDocRetrieverBuilderIT.java b/...nternalClusterTest/java/org/elasticsearch/search/retriever/RankDocRetrieverBuilderIT.java
@@ -36,6 +36,7 @@
 import org.elasticsearch.search.sort.FieldSortBuilder;
 import org.elasticsearch.search.sort.NestedSortBuilder;
 import org.elasticsearch.search.sort.ScoreSortBuilder;
+import org.elasticsearch.search.sort.ShardDocSortField;
 import org.elasticsearch.search.sort.SortBuilder;
 import org.elasticsearch.search.sort.SortOrder;
 import org.elasticsearch.test.ESIntegTestCase;
@@ -189,8 +190,10 @@ public void testRankDocsRetrieverBasicWithPagination() {
         SearchSourceBuilder source = new SearchSourceBuilder();
         StandardRetrieverBuilder standard0 = new StandardRetrieverBuilder();
         // this one retrieves docs 1, 4, and 6
-        standard0.queryBuilder = QueryBuilders.constantScoreQuery(QueryBuilders.queryStringQuery("quick").defaultField(TEXT_FIELD))
-            .boost(10L);
+        standard0.queryBuilder = QueryBuilders.boolQuery()
+            .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_1")).boost(10L))
+            .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_4")).boost(9L))
+            .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_6")).boost(8L));
         StandardRetrieverBuilder standard1 = new StandardRetrieverBuilder();
         // this one retrieves docs 2 and 6 due to prefilter
         standard1.queryBuilder = QueryBuilders.constantScoreQuery(QueryBuilders.termsQuery(ID_FIELD, "doc_2", "doc_3", "doc_6")).boost(20L);
@@ -205,8 +208,8 @@ public void testRankDocsRetrieverBasicWithPagination() {
             null
         );
         // the compound retriever here produces a score for a doc based on the percentage of the queries that it was matched on and
-        // resolves ties based on actual score, rank, and then the doc (we're forcing 1 shard for consistent results)
-        // so ideal rank would be: 6, 2, 1, 4, 7, 3 and with pagination, we'd just omit the first result
+        // resolves ties based on actual score, and then the doc (we're forcing 1 shard for consistent results)
+        // so ideal rank would be: 6, 2, 1, 3, 4, 7 and with pagination, we'd just omit the first result
         source.retriever(
             new CompoundRetrieverWithRankDocs(
                 rankWindowSize,
@@ -227,9 +230,9 @@ public void testRankDocsRetrieverBasicWithPagination() {
             assertThat(resp.getHits().getTotalHits().relation, equalTo(TotalHits.Relation.EQUAL_TO));
             assertThat(resp.getHits().getAt(0).getId(), equalTo("doc_2"));
             assertThat(resp.getHits().getAt(1).getId(), equalTo("doc_1"));
-            assertThat(resp.getHits().getAt(2).getId(), equalTo("doc_4"));
-            assertThat(resp.getHits().getAt(3).getId(), equalTo("doc_7"));
-            assertThat(resp.getHits().getAt(4).getId(), equalTo("doc_3"));
+            assertThat(resp.getHits().getAt(2).getId(), equalTo("doc_3"));
+            assertThat(resp.getHits().getAt(3).getId(), equalTo("doc_4"));
+            assertThat(resp.getHits().getAt(4).getId(), equalTo("doc_7"));
         });
     }
 
@@ -242,8 +245,10 @@ public void testRankDocsRetrieverWithAggs() {
         SearchSourceBuilder source = new SearchSourceBuilder();
         StandardRetrieverBuilder standard0 = new StandardRetrieverBuilder();
         // this one retrieves docs 1, 4, and 6
-        standard0.queryBuilder = QueryBuilders.constantScoreQuery(QueryBuilders.queryStringQuery("quick").defaultField(TEXT_FIELD))
-            .boost(10L);
+        standard0.queryBuilder = QueryBuilders.boolQuery()
+            .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_1")).boost(10L))
+            .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_4")).boost(9L))
+            .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_6")).boost(8L));
         StandardRetrieverBuilder standard1 = new StandardRetrieverBuilder();
         // this one retrieves docs 2 and 6 due to prefilter
         standard1.queryBuilder = QueryBuilders.constantScoreQuery(QueryBuilders.termsQuery(ID_FIELD, "doc_2", "doc_3", "doc_6")).boost(20L);
@@ -267,13 +272,15 @@ public void testRankDocsRetrieverWithAggs() {
                 )
             )
         );
+        source.size(1);
         source.aggregation(new TermsAggregationBuilder("topic").field(TOPIC_FIELD));
         SearchRequestBuilder req = client().prepareSearch(INDEX).setSource(source);
         ElasticsearchAssertions.assertResponse(req, resp -> {
             assertNull(resp.pointInTimeId());
             assertNotNull(resp.getHits().getTotalHits());
-            assertThat(resp.getHits().getTotalHits().value, equalTo(1L));
+            assertThat(resp.getHits().getTotalHits().value, equalTo(5L));
             assertThat(resp.getHits().getTotalHits().relation, equalTo(TotalHits.Relation.EQUAL_TO));
+            assertThat(resp.getHits().getHits().length, equalTo(1));
             assertThat(resp.getHits().getAt(0).getId(), equalTo("doc_2"));
             assertNotNull(resp.getAggregations());
             assertNotNull(resp.getAggregations().get("topic"));
@@ -291,8 +298,10 @@ public void testRankDocsRetrieverWithCollapse() {
         SearchSourceBuilder source = new SearchSourceBuilder();
         StandardRetrieverBuilder standard0 = new StandardRetrieverBuilder();
         // this one retrieves docs 1, 4, and 6
-        standard0.queryBuilder = QueryBuilders.constantScoreQuery(QueryBuilders.queryStringQuery("quick").defaultField(TEXT_FIELD))
-            .boost(10L);
+        standard0.queryBuilder = QueryBuilders.boolQuery()
+            .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_1")).boost(10L))
+            .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_4")).boost(9L))
+            .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_6")).boost(8L));
         StandardRetrieverBuilder standard1 = new StandardRetrieverBuilder();
         // this one retrieves docs 2 and 6 due to prefilter
         standard1.queryBuilder = QueryBuilders.constantScoreQuery(QueryBuilders.termsQuery(ID_FIELD, "doc_2", "doc_3", "doc_6")).boost(20L);
@@ -307,8 +316,8 @@ public void testRankDocsRetrieverWithCollapse() {
             null
         );
         // the compound retriever here produces a score for a doc based on the percentage of the queries that it was matched on and
-        // resolves ties based on actual score, rank, and then the doc (we're forcing 1 shard for consistent results)
-        // so ideal rank would be: 6, 2, 1, 4, 7, 3
+        // resolves ties based on actual score, and then the doc (we're forcing 1 shard for consistent results)
+        // so ideal rank would be: 6, 2, 1, 3, 4, 7
         // with collapsing on topic field we would have 6, 2, 1, 7
         source.retriever(
             new CompoundRetrieverWithRankDocs(
@@ -338,7 +347,6 @@ public void testRankDocsRetrieverWithCollapse() {
             assertThat(resp.getHits().getAt(1).field(TOPIC_FIELD).getValue().toString(), equalTo("astronomy"));
             assertThat(resp.getHits().getAt(2).getId(), equalTo("doc_1"));
             assertThat(resp.getHits().getAt(2).field(TOPIC_FIELD).getValue().toString(), equalTo("technology"));
-            assertThat(resp.getHits().getAt(2).getInnerHits().get("a").getHits().length, equalTo(3));
             assertThat(resp.getHits().getAt(2).getInnerHits().get("a").getAt(0).getId(), equalTo("doc_4"));
             assertThat(resp.getHits().getAt(2).getInnerHits().get("a").getAt(1).getId(), equalTo("doc_3"));
             assertThat(resp.getHits().getAt(2).getInnerHits().get("a").getAt(2).getId(), equalTo("doc_1"));
@@ -347,17 +355,15 @@ public void testRankDocsRetrieverWithCollapse() {
         });
     }
 
-    public void testRankDocsRetrieverWithCollapseAndAggs() {
-        // same as above, but we only want to bring back the top result from each subsearch
-        // so that would be 1, 2, and 7
-        // and final rank would be (based on score): 2, 1, 7
-        // aggs should still account for the same docs as the testRankDocsRetriever test, i.e. all but doc_5
+    public void testRankDocsRetrieverWithNestedCollapseAndAggs() {
         final int rankWindowSize = 10;
         SearchSourceBuilder source = new SearchSourceBuilder();
         StandardRetrieverBuilder standard0 = new StandardRetrieverBuilder();
         // this one retrieves docs 1 and 6 as doc_4 is collapsed to doc_1
-        standard0.queryBuilder = QueryBuilders.constantScoreQuery(QueryBuilders.queryStringQuery("quick").defaultField(TEXT_FIELD))
-            .boost(10L);
+        standard0.queryBuilder = QueryBuilders.boolQuery()
+            .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_1")).boost(10L))
+            .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_4")).boost(9L))
+            .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_6")).boost(8L));
         standard0.collapseBuilder = new CollapseBuilder(TOPIC_FIELD).setInnerHits(
             new InnerHitBuilder("a").addSort(new FieldSortBuilder(DOC_FIELD).order(SortOrder.DESC)).setSize(10)
         );
@@ -375,8 +381,8 @@ public void testRankDocsRetrieverWithCollapseAndAggs() {
             null
         );
         // the compound retriever here produces a score for a doc based on the percentage of the queries that it was matched on and
-        // resolves ties based on actual score, rank, and then the doc (we're forcing 1 shard for consistent results)
-        // so ideal rank would be: 6, 2, 1, 4, 7, 3
+        // resolves ties based on actual score, and then the doc (we're forcing 1 shard for consistent results)
+        // so ideal rank would be: 6, 2, 1, 3, 4, 7
         source.retriever(
             new CompoundRetrieverWithRankDocs(
                 rankWindowSize,
@@ -392,7 +398,7 @@ public void testRankDocsRetrieverWithCollapseAndAggs() {
         ElasticsearchAssertions.assertResponse(req, resp -> {
             assertNull(resp.pointInTimeId());
             assertNotNull(resp.getHits().getTotalHits());
-            assertThat(resp.getHits().getTotalHits().value, equalTo(5L));
+            assertThat(resp.getHits().getTotalHits().value, equalTo(6L));
             assertThat(resp.getHits().getTotalHits().relation, equalTo(TotalHits.Relation.EQUAL_TO));
             assertThat(resp.getHits().getAt(0).getId(), equalTo("doc_6"));
             assertNotNull(resp.getAggregations());
@@ -427,8 +433,8 @@ public void testRankDocsRetrieverWithNestedQuery() {
             null
         );
         // the compound retriever here produces a score for a doc based on the percentage of the queries that it was matched on and
-        // resolves ties based on actual score, rank, and then the doc (we're forcing 1 shard for consistent results)
-        // so ideal rank would be: 6, 2, 1, 4, 3, 7
+        // resolves ties based on actual score, and then the doc (we're forcing 1 shard for consistent results)
+        // so ideal rank would be: 6, 2, 1, 3, 4, 7
         source.retriever(
             new CompoundRetrieverWithRankDocs(
                 rankWindowSize,
@@ -460,8 +466,10 @@ public void testRankDocsRetrieverMultipleCompoundRetrievers() {
         SearchSourceBuilder source = new SearchSourceBuilder();
         StandardRetrieverBuilder standard0 = new StandardRetrieverBuilder();
         // this one retrieves docs 1, 4, and 6
-        standard0.queryBuilder = QueryBuilders.constantScoreQuery(QueryBuilders.queryStringQuery("quick").defaultField(TEXT_FIELD))
-            .boost(10L);
+        standard0.queryBuilder = QueryBuilders.boolQuery()
+            .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_1")).boost(10L))
+            .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_4")).boost(9L))
+            .should(QueryBuilders.constantScoreQuery(QueryBuilders.idsQuery().addIds("doc_6")).boost(8L));
         StandardRetrieverBuilder standard1 = new StandardRetrieverBuilder();
         // this one retrieves docs 2 and 6 due to prefilter
         standard1.queryBuilder = QueryBuilders.constantScoreQuery(QueryBuilders.termsQuery(ID_FIELD, "doc_2", "doc_3", "doc_6")).boost(20L);
@@ -506,11 +514,11 @@ public void testRankDocsRetrieverMultipleCompoundRetrievers() {
             assertThat(resp.getHits().getTotalHits().value, equalTo(6L));
             assertThat(resp.getHits().getTotalHits().relation, equalTo(TotalHits.Relation.EQUAL_TO));
             assertThat(resp.getHits().getAt(0).getId(), equalTo("doc_4"));
-            assertThat(resp.getHits().getAt(1).getId(), equalTo("doc_6"));
+            assertThat(resp.getHits().getAt(1).getId(), equalTo("doc_1"));
             assertThat(resp.getHits().getAt(2).getId(), equalTo("doc_2"));
-            assertThat(resp.getHits().getAt(3).getId(), equalTo("doc_1"));
-            assertThat(resp.getHits().getAt(4).getId(), equalTo("doc_7"));
-            assertThat(resp.getHits().getAt(5).getId(), equalTo("doc_3"));
+            assertThat(resp.getHits().getAt(3).getId(), equalTo("doc_3"));
+            assertThat(resp.getHits().getAt(4).getId(), equalTo("doc_6"));
+            assertThat(resp.getHits().getAt(5).getId(), equalTo("doc_7"));
         });
     }
 
@@ -545,9 +553,9 @@ public void testRankDocsRetrieverDifferentNestedSorting() {
             assertThat(resp.getHits().getTotalHits().relation, equalTo(TotalHits.Relation.EQUAL_TO));
             assertThat(resp.getHits().getAt(0).getId(), equalTo("doc_4"));
             assertThat(resp.getHits().getAt(1).getId(), equalTo("doc_1"));
-            assertThat(resp.getHits().getAt(2).getId(), equalTo("doc_7"));
+            assertThat(resp.getHits().getAt(2).getId(), equalTo("doc_2"));
             assertThat(resp.getHits().getAt(3).getId(), equalTo("doc_6"));
-            assertThat(resp.getHits().getAt(4).getId(), equalTo("doc_2"));
+            assertThat(resp.getHits().getAt(4).getId(), equalTo("doc_7"));
         });
     }
 
@@ -673,22 +681,14 @@ private RankDoc[] getRankDocs(SearchResponse searchResponse) {
             for (int i = 0; i < size; i++) {
                 var hit = searchResponse.getHits().getAt(i);
                 long sortValue = (long) hit.getRawSortValues()[hit.getRawSortValues().length - 1];
-                int doc = decodeDoc(sortValue);
-                int shardRequestIndex = decodeShardRequestIndex(sortValue);
+                int doc = ShardDocSortField.decodeDoc(sortValue);
+                int shardRequestIndex = ShardDocSortField.decodeShardRequestIndex(sortValue);
                 docs[i] = new RankDoc(doc, hit.getScore(), shardRequestIndex);
                 docs[i].rank = i + 1;
             }
             return docs;
         }
 
-        public static int decodeDoc(long value) {
-            return (int) value;
-        }
-
-        public static int decodeShardRequestIndex(long value) {
-            return (int) (value >> 32);
-        }
-
         record RankDocAndHitRatio(RankDoc rankDoc, float hitRatio) {}
 
         /**

diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java
@@ -231,6 +231,7 @@ static TransportVersion def(int id) {
     public static final TransportVersion CCS_REMOTE_TELEMETRY_STATS = def(8_755_00_0);
     public static final TransportVersion ESQL_CCS_EXECUTION_INFO = def(8_756_00_0);
     public static final TransportVersion REGEX_AND_RANGE_INTERVAL_QUERIES = def(8_757_00_0);
+    public static final TransportVersion RRF_QUERY_REWRITE = def(8_758_00_0);
 
     /*
      * STOP! READ THIS FIRST! No, really,

diff --git a/server/src/main/java/org/elasticsearch/action/search/TransportSearchAction.java b/server/src/main/java/org/elasticsearch/action/search/TransportSearchAction.java
@@ -502,6 +502,8 @@ void executeRequest(
         });
         final SearchSourceBuilder source = original.source();
         if (shouldOpenPIT(source)) {
+            // disabling shard reordering for request
+            original.setPreFilterShardSize(Integer.MAX_VALUE);
             openPIT(client, original, searchService.getDefaultKeepAliveInMillis(), listener.delegateFailureAndWrap((delegate, resp) -> {
                 // We set the keep alive to -1 to indicate that we don't need the pit id in the response.
                 // This is needed since we delete the pit prior to sending the response so the id doesn't exist anymore.

diff --git a/server/src/main/java/org/elasticsearch/common/lucene/Lucene.java b/server/src/main/java/org/elasticsearch/common/lucene/Lucene.java
@@ -74,7 +74,6 @@
 import org.elasticsearch.index.analysis.NamedAnalyzer;
 import org.elasticsearch.index.fielddata.IndexFieldData;
 import org.elasticsearch.lucene.grouping.TopFieldGroups;
-import org.elasticsearch.search.retriever.rankdoc.RankDocsSortField;
 import org.elasticsearch.search.sort.ShardDocSortField;
 
 import java.io.IOException;
@@ -553,8 +552,6 @@ private static SortField rewriteMergeSortField(SortField sortField) {
             return newSortField;
         } else if (sortField.getClass() == ShardDocSortField.class) {
             return new SortField(sortField.getField(), SortField.Type.LONG, sortField.getReverse());
-        } else if (sortField.getClass() == RankDocsSortField.class) {
-            return new SortField(sortField.getField(), SortField.Type.INT, sortField.getReverse());
         } else {
             return sortField;
         }

diff --git a/...er/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighter.java b/...er/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighter.java
@@ -260,7 +260,7 @@ public void visitLeaf(Query leafQuery) {
                  * KnnScoreDocQuery and RankDocsQuery requires the same reader that built the docs
                  * When using {@link HighlightFlag#WEIGHT_MATCHES} different readers are used and isn't supported by this query
                  */
-                if (leafQuery instanceof KnnScoreDocQuery || leafQuery instanceof RankDocsQuery) {
+                if (leafQuery instanceof KnnScoreDocQuery || leafQuery instanceof RankDocsQuery.TopQuery) {
                     hasUnknownLeaf[0] = true;
                 }
                 super.visitLeaf(query);