Update test_reranking_service to try and parse provided inputs as scores (elastic#122328) (elastic#129730)

ioanatia · pmpailis · web-flow · commit 45420dcaeb5c · 2025-06-19T20:03:27.000+01:00
Co-authored-by: Panagiotis Bailis &lt;pmpailis@gmail.com&gt;
diff --git a/x-pack/plugin/inference/qa/test-service-plugin/src/main/java/org/elasticsearch/xpack/inference/mock/TestRerankingServiceExtension.java b/x-pack/plugin/inference/qa/test-service-plugin/src/main/java/org/elasticsearch/xpack/inference/mock/TestRerankingServiceExtension.java
@@ -36,6 +36,7 @@
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Comparator;
 import java.util.EnumSet;
 import java.util.HashMap;
 import java.util.List;
@@ -148,16 +149,28 @@ public void chunkedInfer(
         }
 
         private RankedDocsResults makeResults(List<String> input) {
-            List<RankedDocsResults.RankedDoc> results = new ArrayList<>();
             int totalResults = input.size();
-            float minScore = random.nextFloat(-1f, 1f);
-            float resultDiff = 0.2f;
-            for (int i = 0; i < input.size(); i++) {
-                results.add(
-                    new RankedDocsResults.RankedDoc(totalResults - 1 - i, minScore + resultDiff * (totalResults - i), input.get(i))
-                );
+            try {
+                List<RankedDocsResults.RankedDoc> results = new ArrayList<>();
+                for (int i = 0; i < totalResults; i++) {
+                    results.add(new RankedDocsResults.RankedDoc(i, Float.parseFloat(input.get(i)), input.get(i)));
+                }
+                return new RankedDocsResults(results.stream().sorted(Comparator.reverseOrder()).toList());
+            } catch (NumberFormatException ex) {
+                List<RankedDocsResults.RankedDoc> results = new ArrayList<>();
+                float minScore = random.nextFloat(-1f, 1f);
+                float resultDiff = 0.2f;
+                for (int i = 0; i < input.size(); i++) {
+                    results.add(
+                        new RankedDocsResults.RankedDoc(
+                            totalResults - 1 - i,
+                            minScore + resultDiff * (totalResults - i),
+                            input.get(totalResults - 1 - i)
+                        )
+                    );
+                }
+                return new RankedDocsResults(results);
             }
-            return new RankedDocsResults(results);
         }
 
         protected ServiceSettings getServiceSettingsFromMap(Map<String, Object> serviceSettingsMap) {
diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java
@@ -28,6 +28,9 @@ public class InferenceFeatures implements FeatureSpecification {
     private static final NodeFeature SEMANTIC_TEXT_HIGHLIGHTER = new NodeFeature("semantic_text.highlighter");
     private static final NodeFeature SEMANTIC_TEXT_HIGHLIGHTER_DEFAULT = new NodeFeature("semantic_text.highlighter.default");
     private static final NodeFeature SEMANTIC_TEXT_MATCH_ALL_HIGHLIGHTER = new NodeFeature("semantic_text.match_all_highlighter");
+    private static final NodeFeature TEST_RERANKING_SERVICE_PARSE_TEXT_AS_SCORE = new NodeFeature(
+        "test_reranking_service.parse_text_as_score"
+    );
 
     @Override
     public Set<NodeFeature> getTestFeatures() {
@@ -47,7 +50,8 @@ public Set<NodeFeature> getTestFeatures() {
             SemanticInferenceMetadataFieldsMapper.INFERENCE_METADATA_FIELDS_ENABLED_BY_DEFAULT,
             SEMANTIC_TEXT_HIGHLIGHTER_DEFAULT,
             SEMANTIC_KNN_FILTER_FIX,
-            SEMANTIC_TEXT_MATCH_ALL_HIGHLIGHTER
+            SEMANTIC_TEXT_MATCH_ALL_HIGHLIGHTER,
+            TEST_RERANKING_SERVICE_PARSE_TEXT_AS_SCORE
         );
     }
 }
diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/70_text_similarity_rank_retriever.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/70_text_similarity_rank_retriever.yml
@@ -2,7 +2,6 @@ setup:
   - skip:
       features:
         - close_to
-        - contains
   - requires:
       test_runner_features: "close_to"
 
@@ -33,16 +32,8 @@ setup:
                 type: keyword
               subtopic:
                 type: keyword
-
-  - do:
-      index:
-        index: test-index
-        id: doc_1
-        body:
-          text: "As seen from Earth, a solar eclipse happens when the Moon is directly between the Earth and the Sun."
-          topic: [ "science" ]
-          subtopic: [ "technology" ]
-        refresh: true
+              inference_text_field:
+                type: text
 
   - do:
       index:
@@ -52,6 +43,7 @@ setup:
           text: "The phases of the Moon come from the position of the Moon relative to the Earth and Sun."
           topic: [ "science" ]
           subtopic: [ "astronomy" ]
+          inference_text_field: "0"
         refresh: true
 
   - do:
@@ -61,11 +53,27 @@ setup:
         body:
           text: "Sun Moon Lake is a lake in Nantou County, Taiwan. It is the largest lake in Taiwan."
           topic: [ "geography" ]
+          inference_text_field: "1"
+        refresh: true
+
+  - do:
+      index:
+        index: test-index
+        id: doc_1
+        body:
+          text: "As seen from Earth, a solar eclipse happens when the Moon is directly between the Earth and the Sun."
+          topic: [ "science" ]
+          subtopic: [ "technology" ]
+          inference_text_field: "-1"
         refresh: true
 
 ---
 "Simple text similarity rank retriever":
 
+  - requires:
+      cluster_features: "test_reranking_service.parse_text_as_score"
+      reason: test_reranking_service can now parse provided input as score to provide deterministic ranks
+
   - do:
       search:
         index: test-index
@@ -75,14 +83,37 @@ setup:
           retriever:
             text_similarity_reranker:
               retriever:
+                # this one returns docs 1 and 2
                 standard:
                   query:
-                    term:
-                      topic: "science"
+                    bool: {
+                      should: [
+                        {
+                          constant_score: {
+                            filter: {
+                              term: {
+                                subtopic: "technology"
+                              }
+                            },
+                            boost: 10
+                          }
+                        },
+                        {
+                          constant_score: {
+                            filter: {
+                              term: {
+                                subtopic: "astronomy"
+                              }
+                            },
+                            boost: 1
+                          }
+                        }
+                      ]
+                    }
               rank_window_size: 10
               inference_id: my-rerank-model
               inference_text: "How often does the moon hide the sun?"
-              field: text
+              field: inference_text_field
           size: 10
 
   - match: { hits.total.value: 2 }
@@ -94,6 +125,10 @@ setup:
 ---
 "Simple text similarity rank retriever and filtering":
 
+  - requires:
+      cluster_features: "test_reranking_service.parse_text_as_score"
+      reason: test_reranking_service can now parse provided input as score to provide deterministic ranks
+
   - do:
       search:
         index: test-index
@@ -103,6 +138,7 @@ setup:
           retriever:
             text_similarity_reranker:
               retriever:
+                # this one returns doc 1
                 standard:
                   query:
                     term:
@@ -113,7 +149,7 @@ setup:
               rank_window_size: 10
               inference_id: my-rerank-model
               inference_text: "How often does the moon hide the sun?"
-              field: text
+              field: inference_text_field
           size: 10
 
   - match: { hits.total.value: 1 }
@@ -143,7 +179,7 @@ setup:
               rank_window_size: 10
               inference_id: i-dont-exist
               inference_text: "How often does the moon hide the sun?"
-              field: text
+              field: inference_text_field
           size: 10
 
 ---
@@ -169,13 +205,17 @@ setup:
               rank_window_size: 10
               inference_id: i-dont-exist
               inference_text: "asdfasdf"
-              field: text
+              field: inference_text_field
           size: 10
 
 
 ---
 "text similarity reranking with explain":
 
+  - requires:
+      cluster_features: "test_reranking_service.parse_text_as_score"
+      reason: test_reranking_service can now parse provided input as score to provide deterministic ranks
+
   - do:
       search:
         index: test-index
@@ -186,28 +226,50 @@ setup:
             text_similarity_reranker: {
               retriever:
                 {
+                  # this one returns doc 1 and 2
                   standard: {
                     query: {
-                      term: {
-                        topic: "science"
+                      bool: {
+                        should: [
+                          {
+                            constant_score: {
+                              filter: {
+                                term: {
+                                  subtopic: "technology"
+                                }
+                              },
+                              boost: 10
+                            }
+                          },
+                          {
+                            constant_score: {
+                              filter: {
+                                term: {
+                                  subtopic: "astronomy"
+                                }
+                              },
+                              boost: 1
+                            }
+                          }
+                        ]
                       }
                     }
                   }
                 },
               rank_window_size: 10,
               inference_id: my-rerank-model,
               inference_text: "How often does the moon hide the sun?",
-              field: text
+              field: inference_text_field
             }
           }
           size: 10
           explain: true
 
-  - contains: { hits.hits: { _id: "doc_2" } }
-  - contains: { hits.hits: { _id: "doc_1" } }
+  - match: { hits.hits.0._id: "doc_2" }
+  - match: { hits.hits.1._id: "doc_1" }
 
-  - match: {hits.hits.0._explanation.description: "/text_similarity_reranker.match.using.inference.endpoint:.\\[my-rerank-model\\].on.document.field:.\\[text\\].*/" }
-  - match: {hits.hits.0._explanation.details.0.description: "/weight.*science.*/" }
+  - match: {hits.hits.0._explanation.description: "/text_similarity_reranker.match.using.inference.endpoint:.\\[my-rerank-model\\].on.document.field:.\\[inference_text_field\\].*/" }
+  - match: {hits.hits.0._explanation.details.0.details.0.description: "/subtopic.*astronomy.*/" }
 
 ---
 "text similarity reranker properly handles aliases":
@@ -281,7 +343,7 @@ setup:
               rank_window_size: 10
               inference_id: my-rerank-model
               inference_text: "How often does the moon hide the sun?"
-              field: text
+              field: inference_text_field
           size: 10
 
   - match: { hits.total.value: 1 }
diff --git a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/rrf/800_rrf_with_text_similarity_reranker_retriever.yml b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/rrf/800_rrf_with_text_similarity_reranker_retriever.yml