pawankartik-elastic
diff --git a/‎benchmarks/src/main/java/org/elasticsearch/benchmark/_nightly/esql/QueryPlanningBenchmark.java‎
Lines changed: 2 additions & 1 deletion b/‎benchmarks/src/main/java/org/elasticsearch/benchmark/_nightly/esql/QueryPlanningBenchmark.java‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎docs/changelog/139074.yaml‎
Lines changed: 5 additions & 0 deletions b/‎docs/changelog/139074.yaml‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎docs/reference/query-languages/esql/_snippets/commands/layout/completion.md‎
Lines changed: 33 additions & 1 deletion b/‎docs/reference/query-languages/esql/_snippets/commands/layout/completion.md‎
Lines changed: 33 additions & 1 deletion
diff --git a/‎docs/reference/query-languages/esql/_snippets/commands/layout/rerank.md‎
Lines changed: 47 additions & 0 deletions b/‎docs/reference/query-languages/esql/_snippets/commands/layout/rerank.md‎
Lines changed: 47 additions & 0 deletions
diff --git a/‎docs/reference/query-languages/esql/_snippets/functions/examples/chunk.md‎
Lines changed: 9 additions & 27 deletions b/‎docs/reference/query-languages/esql/_snippets/functions/examples/chunk.md‎
Lines changed: 9 additions & 27 deletions
diff --git a/‎docs/reference/query-languages/esql/_snippets/lists/string-functions.md‎
Lines changed: 1 addition & 0 deletions b/‎docs/reference/query-languages/esql/_snippets/lists/string-functions.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/reference/query-languages/esql/kibana/definition/functions/chunk.json‎
Lines changed: 1 addition & 2 deletions b/‎docs/reference/query-languages/esql/kibana/definition/functions/chunk.json‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎docs/reference/query-languages/esql/kibana/docs/functions/chunk.md‎
Lines changed: 2 additions & 2 deletions b/‎docs/reference/query-languages/esql/kibana/docs/functions/chunk.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎muted-tests.yml‎
Lines changed: 9 additions & 0 deletions b/‎muted-tests.yml‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataType.java‎
Lines changed: 5 additions & 4 deletions b/‎x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataType.java‎
Lines changed: 5 additions & 4 deletions
@@ -27,6 +27,7 @@
 import org.elasticsearch.xpack.esql.index.EsIndex;
 import org.elasticsearch.xpack.esql.index.IndexResolution;
 import org.elasticsearch.xpack.esql.inference.InferenceResolution;
+import org.elasticsearch.xpack.esql.inference.InferenceSettings;
 import org.elasticsearch.xpack.esql.optimizer.LogicalOptimizerContext;
 import org.elasticsearch.xpack.esql.optimizer.LogicalPlanOptimizer;
 import org.elasticsearch.xpack.esql.parser.EsqlParser;
@@ -126,7 +127,7 @@ public void setup() {
     }
 
     private LogicalPlan plan(EsqlParser parser, Analyzer analyzer, LogicalPlanOptimizer optimizer, String query) {
-        var parsed = parser.parseQuery(query, new QueryParams(), telemetry);
+        var parsed = parser.parseQuery(query, new QueryParams(), telemetry, new InferenceSettings(Settings.EMPTY));
         var analyzed = analyzer.analyze(parsed);
         var optimized = optimizer.optimize(analyzed);
         return optimized;
 
@@ -0,0 +1,5 @@
+pr: 139074
+summary: "[ESQL][Inference] Introduce usage limits for COMPLETION and RERANK"
+area: ES|QL
+type: enhancement
+issues: []
@@ -6,9 +6,38 @@ stack: preview 9.1.0
 
 The `COMPLETION` command allows you to send prompts and context to a Large Language Model (LLM) directly within your ES|QL queries, to perform text generation tasks.
 
-:::{important}
+:::::{important}
 **Every row processed by the COMPLETION command generates a separate API call to the LLM endpoint.**
 
+::::{tab-set}
+
+:::{tab-item} 9.3.0+
+
+Starting in version 9.3.0, `COMPLETION` automatically limits processing to **100 rows by default** to prevent accidental high consumption and costs. This limit is applied before the `COMPLETION` command executes.
+
+If you need to process more rows, you can adjust the limit using the cluster setting:
+```
+PUT _cluster/settings
+{
+  "persistent": {
+    "esql.command.completion.limit": 500
+  }
+}
+```
+
+You can also disable the command entirely if needed:
+```
+PUT _cluster/settings
+{
+  "persistent": {
+    "esql.command.completion.enabled": false
+  }
+}
+```
+:::
+
+:::{tab-item} 9.1.x - 9.2.x
+
 Be careful to test with small datasets first before running on production data or in automated workflows, to avoid unexpected costs.
 
 Best practices:
@@ -19,6 +48,9 @@ Best practices:
 4. **Monitor usage**: Track your LLM API consumption and costs.
 :::
 
+::::
+:::::
+
 **Syntax**
 
 ::::{tab-set}
 
@@ -7,6 +7,53 @@ stack: preview 9.2.0
 The `RERANK` command uses an inference model to compute a new relevance score
 for an initial set of documents, directly within your ES|QL queries.
 
+:::::{important}
+**RERANK processes each row through an inference model, which impacts performance and costs.**
+
+::::{tab-set}
+
+:::{tab-item} 9.3.0+
+
+Starting in version 9.3.0, `RERANK` automatically limits processing to **1000 rows by default** to prevent accidental high consumption. This limit is applied before the `RERANK` command executes.
+
+If you need to process more rows, you can adjust the limit using the cluster setting:
+```
+PUT _cluster/settings
+{
+  "persistent": {
+    "esql.command.rerank.limit": 5000
+  }
+}
+```
+
+You can also disable the command entirely if needed:
+```
+PUT _cluster/settings
+{
+  "persistent": {
+    "esql.command.rerank.enabled": false
+  }
+}
+```
+:::
+
+:::{tab-item} 9.2.x
+
+No automatic row limit is applied. **You should always use `LIMIT` before or after `RERANK` to control the number of documents processed**, to avoid accidentally reranking large datasets which can result in high latency and increased costs.
+
+For example:
+```esql
+FROM books
+| WHERE title:"search query"
+| SORT _score DESC
+| LIMIT 100  // Limit to top 100 results before reranking
+| RERANK "search query" ON title WITH { "inference_id" : "my_rerank_endpoint" }
+```
+:::
+
+::::
+:::::
+
 **Syntax**
 
 ```esql
 
@@ -1,5 +1,6 @@
 * [`BIT_LENGTH`](../../functions-operators/string-functions.md#esql-bit_length)
 * [`BYTE_LENGTH`](../../functions-operators/string-functions.md#esql-byte_length)
+* [`CHUNK`](../../functions-operators/string-functions.md#esql-chunk)
 * [`CONCAT`](../../functions-operators/string-functions.md#esql-concat)
 * [`CONTAINS`](../../functions-operators/string-functions.md#esql-contains)
 * [`ENDS_WITH`](../../functions-operators/string-functions.md#esql-ends_with)
 
@@ -475,6 +475,15 @@ tests:
 - class: org.elasticsearch.xpack.esql.ccq.MultiClusterSpecIT
   method: test {csv-spec:spatial.ConvertFromStringParseError}
   issue: https://github.com/elastic/elasticsearch/issues/139213
+- class: org.elasticsearch.test.rest.yaml.RcsCcsCommonYamlTestSuiteIT
+  method: test {p0=search.vectors/180_update_dense_vector_type/Test update flat --> bbq_flat --> bbq_hnsw}
+  issue: https://github.com/elastic/elasticsearch/issues/139253
+- class: org.elasticsearch.test.rest.yaml.CcsCommonYamlTestSuiteIT
+  method: test {p0=search.vectors/41_knn_search_half_byte_quantized_bfloat16/Knn search with mip}
+  issue: https://github.com/elastic/elasticsearch/issues/139254
+- class: org.elasticsearch.xpack.security.authc.saml.SamlServiceProviderMetadataIT
+  method: testAuthenticationWhenMetadataIsUnreliable
+  issue: https://github.com/elastic/elasticsearch/issues/139067
 
 # Examples:
 #
 
@@ -370,7 +370,7 @@ public enum DataType implements Writeable {
         builder().esType("exponential_histogram")
             .estimatedSize(16 * 160)// guess 160 buckets (OTEL default for positive values only histograms) with 16 bytes per bucket
             .docValues()
-            .underConstruction(DataTypesTransportVersions.RESOLVE_FIELDS_RESPONSE_USED_TV)
+            .underConstruction(DataTypesTransportVersions.TEXT_SIMILARITY_RANK_DOC_EXPLAIN_CHUNKS_VERSION)
     ),
 
     /*
@@ -1043,10 +1043,11 @@ public static class DataTypesTransportVersions {
         );
 
         /**
-         * First transport version after the PR that introduced the exponential histogram data type.
+         * First transport version after the PR that introduced the exponential histogram data type which was NOT also backported to 9.2.
+         * (Exp. histogram was added as SNAPSHOT-only to 9.3.)
          */
-        public static final TransportVersion RESOLVE_FIELDS_RESPONSE_USED_TV = TransportVersion.fromName(
-            "esql_resolve_fields_response_used"
+        public static final TransportVersion TEXT_SIMILARITY_RANK_DOC_EXPLAIN_CHUNKS_VERSION = TransportVersion.fromName(
+            "text_similarity_rank_docs_explain_chunks"
         );
     }
 }