elastic
diff --git a/‎x-pack/platform/plugins/shared/inference/scripts/evaluation/scenarios/esql/index.spec.ts‎
Lines changed: 453 additions & 9 deletions b/‎x-pack/platform/plugins/shared/inference/scripts/evaluation/scenarios/esql/index.spec.ts‎
Lines changed: 453 additions & 9 deletions
diff --git a/‎x-pack/platform/plugins/shared/inference/scripts/load_esql_docs/load_esql_docs.ts‎
Lines changed: 3 additions & 3 deletions b/‎x-pack/platform/plugins/shared/inference/scripts/load_esql_docs/load_esql_docs.ts‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎x-pack/platform/plugins/shared/inference/scripts/util/kibana_client.ts‎
Lines changed: 2 additions & 2 deletions b/‎x-pack/platform/plugins/shared/inference/scripts/util/kibana_client.ts‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/doc_base/suggestions.ts‎
Lines changed: 5 additions & 0 deletions b/‎x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/doc_base/suggestions.ts‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-bit_length.txt‎
Lines changed: 24 additions & 0 deletions b/‎x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-bit_length.txt‎
Lines changed: 24 additions & 0 deletions
diff --git a/‎x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-bucket.txt‎
Lines changed: 16 additions & 6 deletions b/‎x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-bucket.txt‎
Lines changed: 16 additions & 6 deletions
diff --git a/‎x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-byte_length.txt‎
Lines changed: 22 additions & 0 deletions b/‎x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-byte_length.txt‎
Lines changed: 22 additions & 0 deletions
diff --git a/‎x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-case.txt‎
Lines changed: 9 additions & 5 deletions b/‎x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-case.txt‎
Lines changed: 9 additions & 5 deletions
diff --git a/‎x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-categorize.txt‎
Lines changed: 30 additions & 0 deletions b/‎x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-categorize.txt‎
Lines changed: 30 additions & 0 deletions
diff --git a/‎x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-date_extract.txt‎
Lines changed: 1 addition & 1 deletion b/‎x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-date_extract.txt‎
Lines changed: 1 addition & 1 deletion
@@ -69,7 +69,7 @@ yargs(process.argv.slice(2))
           });
           log.info(`Using connector ${connector.connectorId}`);
 
-          const chatClient = kibanaClient.createInferenceClient({
+          const inferenceClient = kibanaClient.createInferenceClient({
             connectorId: connector.connectorId,
           });
 
@@ -84,14 +84,14 @@ yargs(process.argv.slice(2))
           log.info(`Retrieving and converting documentation from ${builtDocsDir}...`);
           const extraction = await extractDocEntries({
             builtDocsDir,
-            inferenceClient: chatClient,
+            inferenceClient,
             log,
           });
 
           log.info(`Rewriting documentation...`);
           const docFiles = await generateDoc({
             extraction,
-            inferenceClient: chatClient,
+            inferenceClient,
             log,
           });
 
 
@@ -212,10 +212,10 @@ export class KibanaClient {
         return this.axios
           .post(
             this.getUrl({
-              pathname: `/internal/inference/chat_complete/stream`,
+              pathname: `/internal/inference/chat_complete`,
             }),
             body,
-            { responseType: 'stream', timeout: NaN }
+            { timeout: NaN }
           )
           .then((response) => {
             return response.data;
 
@@ -13,6 +13,11 @@ const suggestions: Suggestion[] = [
       return ['BUCKET'];
     }
   },
+  (keywords) => {
+    if (keywords.includes('TO_DATETIME')) {
+      return ['DATE_PARSE'];
+    }
+  },
 ];
 
 /**
 
@@ -0,0 +1,24 @@
+# BIT_LENGTH
+
+This function calculates the bit length of a given string.
+
+## Syntax
+
+`BIT_LENGTH(string)`
+
+### Parameters
+
+#### string
+
+This is the string whose bit length you want to calculate. If `null` is provided, the function will return `null`.
+
+**Note**: Strings are in UTF-8 format, which means a single character may occupy multiple bytes.
+
+## Examples
+
+```esql
+FROM airports
+| WHERE country == "India"
+| KEEP city
+| EVAL fn_length = LENGTH(city), fn_bit_length = BIT_LENGTH(city)
+```
@@ -30,8 +30,7 @@ BUCKET can operate in two modes:
 - one where the bucket size is computed based on a bucket count recommendation and a range,
 - and another where the bucket size is provided directly.
 
-When the bucket size is provided directly for time interval,
-it is expressed as a *timespan literal*, e.g.
+When the bucket size is provided directly for time interval, it is expressed as a **timespan literal**, e.g.
 - GOOD: `BUCKET(@timestamp, 1 month)`
 - BAD: `BUCKET(@timestamp, "month")`
 
@@ -74,29 +73,40 @@ FROM employees
 
 More examples:
 
+*Regrouping employees in buckets based on salary and counting them*
 ```esql
 FROM employees
 | WHERE hire_date >= "1985-01-01T00:00:00Z" AND hire_date < "1986-01-01T00:00:00Z"
-| STATS c = COUNT(1) BY b = BUCKET(salary, 5000.)
+| STATS c = COUNT(*) BY b = BUCKET(salary, 5000.)
 | SORT b
 ```
 
+*Group data emitted over the last 24h into 25 buckets*
 ```esql
 FROM sample_data
 | WHERE @timestamp >= NOW() - 1 day and @timestamp < NOW()
 | STATS COUNT(*) BY bucket = BUCKET(@timestamp, 25, NOW() - 1 day, NOW())
 ```
 
+*Similar to previous example but with fixed 1 hour bucket size*
+```esql
+FROM sample_data
+| WHERE @timestamp >= NOW() - 1 day and @timestamp < NOW()
+| STATS COUNT(*) BY bucket = BUCKET(@timestamp, 1 hour)
+```
+
+*Group employees in 20 buckets based on their hire_date and then calculate the average salary for each bucket*
 ```esql
 FROM employees
 | WHERE hire_date >= "1985-01-01T00:00:00Z" AND hire_date < "1986-01-01T00:00:00Z"
 | STATS AVG(salary) BY bucket = BUCKET(hire_date, 20, "1985-01-01T00:00:00Z", "1986-01-01T00:00:00Z")
 | SORT bucket
 ```
 
+*Similar to previous example but using fixed 1 month buckets size*
 ```esql
 FROM employees
-| STATS s1 = BUCKET(salary / 1000 + 999, 50.) + 2 BY b1 = BUCKET(salary / 100 + 99, 50.), b2 = BUCKET(salary / 1000 + 999, 50.)
-| SORT b1, b2
-| KEEP b1, s1, b2
+| WHERE hire_date >= "1985-01-01T00:00:00Z" AND hire_date < "1986-01-01T00:00:00Z"
+| STATS AVG(salary) BY bucket = BUCKET(hire_date, 1 month)
+| SORT bucket
 ```
@@ -0,0 +1,22 @@
+# BYTE_LENGTH
+
+This function calculates the byte length of a given string.
+
+## Syntax
+
+`BYTE_LENGTH(string)`
+
+### Parameters
+
+#### string
+
+The text string for which the byte length is to be determined. If `null` is provided, the function will return `null`.
+
+## Examples
+
+```esql
+FROM airports
+| WHERE country == "India"
+| KEEP city
+| EVAL fn_length = LENGTH(city), fn_byte_length = BYTE_LENGTH(city)
+```
@@ -4,7 +4,7 @@ The CASE function accepts pairs of conditions and values. It returns the value t
 
 ## Syntax
 
-`CASE(condition, trueValue)`
+`CASE(condition, trueValue, elseValue)`
 
 ### Parameters
 
@@ -16,16 +16,20 @@ A condition to evaluate.
 
 The value that is returned when the corresponding condition is the first to evaluate to `true`. If no condition matches, the default value is returned.
 
+#### elseValue
+
+The value that will be returned when no condition evaluates to `true`.
+
 ## Examples
 
-Determine whether employees are monolingual, bilingual, or polyglot:
+In this example, employees are categorized as monolingual, bilingual, or polyglot depending on how many languages they speak:
 
 ```esql
 FROM employees
 | EVAL type = CASE(
     languages <= 1, "monolingual",
     languages <= 2, "bilingual",
-     "polyglot")
+    "polyglot")
 | KEEP emp_no, languages, type
 ```
 
@@ -46,6 +50,6 @@ Calculate an hourly error rate as a percentage of the total number of log messag
 FROM sample_data
 | EVAL error = CASE(message LIKE "*error*", 1, 0)
 | EVAL hour = DATE_TRUNC(1 hour, @timestamp)
-| STATS error_rate = AVG(error) by hour
+| STATS error_rate = AVG(error) BY hour
 | SORT hour
-```
+```
@@ -0,0 +1,30 @@
+# CATEGORIZE
+
+The `CATEGORIZE` function organizes textual data into groups of similar format.
+
+> **Note:** The `CATEGORIZE` function is currently in technical preview and may undergo changes or be removed in future releases.
+
+## Syntax
+
+`CATEGORIZE(field)`
+
+### Parameters
+
+#### field
+
+The expression that is to be categorized.
+
+## Examples
+
+The following example demonstrates how to use `CATEGORIZE` to group server log messages into categories and then aggregate their counts.
+
+```esql
+FROM sample_data
+| STATS count = COUNT() BY category=CATEGORIZE(message)
+```
+
+## Limitations
+
+- `CATEGORIZE` can't be used within other expressions
+- `CATEGORIZE` can't be used with multiple groupings
+- `CATEGORIZE` can't be used or referenced within aggregate functions
@@ -10,7 +10,7 @@ The DATE_EXTRACT function is used to extract specific parts of a date.
 
 #### datePart
 
-This is the part of the date you want to extract, such as "year", "month" or ""hour_of_day".
+This is the part of the date you want to extract, such as "year", "month" or "hour_of_day".
 
 #### date