From dd21dc03fdd3cb1b8d40f40702f94975d60abe12 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?=
 <szabosteve@gmail.com>
Date: Tue, 22 Oct 2024 13:45:05 +0200
Subject: [PATCH 1/7] [DOCS] Initial docs for stream inference API.

---
 docs/reference/inference/inference-apis.asciidoc   | 2 ++
 docs/reference/inference/stream-inference.asciidoc | 0
 2 files changed, 2 insertions(+)
 create mode 100644 docs/reference/inference/stream-inference.asciidoc

diff --git a/docs/reference/inference/inference-apis.asciidoc b/docs/reference/inference/inference-apis.asciidoc
index b291b464be498..c8550912f313e 100644
--- a/docs/reference/inference/inference-apis.asciidoc
+++ b/docs/reference/inference/inference-apis.asciidoc
@@ -19,6 +19,7 @@ the following APIs to manage {infer} models and perform {infer}:
 * <<get-inference-api>>
 * <<post-inference-api>>
 * <<put-inference-api>>
+* <<stream-inference-api>>
 * <<update-inference-api>>
 
 [[inference-landscape]]
@@ -38,6 +39,7 @@ include::delete-inference.asciidoc[]
 include::get-inference.asciidoc[]
 include::post-inference.asciidoc[]
 include::put-inference.asciidoc[]
+include::stream-inference.asciidoc[]
 include::update-inference.asciidoc[]
 include::service-alibabacloud-ai-search.asciidoc[]
 include::service-amazon-bedrock.asciidoc[]
diff --git a/docs/reference/inference/stream-inference.asciidoc b/docs/reference/inference/stream-inference.asciidoc
new file mode 100644
index 0000000000000..e69de29bb2d1d

From f23153f58808c1e56021064e2a8dc447b2d7ba58 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?=
 <szabosteve@gmail.com>
Date: Tue, 22 Oct 2024 15:16:58 +0200
Subject: [PATCH 2/7] [DOCS] Modifies stream docs.

---
 .../inference/stream-inference.asciidoc       | 103 ++++++++++++++++++
 1 file changed, 103 insertions(+)

diff --git a/docs/reference/inference/stream-inference.asciidoc b/docs/reference/inference/stream-inference.asciidoc
index e69de29bb2d1d..a36a16ccc5181 100644
--- a/docs/reference/inference/stream-inference.asciidoc
+++ b/docs/reference/inference/stream-inference.asciidoc
@@ -0,0 +1,103 @@
+[role="xpack"]
+[[stream-inference-api]]
+=== Stream inference API
+
+Streams a chat completion response.
+
+IMPORTANT: The {infer} APIs enable you to use certain services, such as built-in {ml} models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.
+For built-in models and models uploaded through Eland, the {infer} APIs offer an alternative way to use and manage trained models.
+However, if you do not plan to use the {infer} APIs to use these models or if you want to use non-NLP models, use the <<ml-df-trained-models-apis>>.
+
+
+[discrete]
+[[stream-inference-api-request]]
+==== {api-request-title}
+
+`POST /_inference/<inference_id>/_stream`
+
+`POST /_inference/<task_type>/<inference_id>/_stream`
+
+
+[discrete]
+[[stream-inference-api-prereqs]]
+==== {api-prereq-title}
+
+* Requires the `monitor_inference` <<privileges-list-cluster,cluster privilege>>
+(the built-in `inference_admin` and `inference_user` roles grant this privilege)
+
+[discrete]
+[[stream-inference-api-desc]]
+==== {api-description-title}
+
+The stream {infer} API enables real-time responses for completion tasks by delivering answers incrementally, reducing response times during computation.
+It only works with the `completion` task type.
+
+
+[discrete]
+[[stream-inference-api-path-params]]
+==== {api-path-parms-title}
+
+`<inference_id>`::
+(Required, string)
+The unique identifier of the {infer} endpoint.
+
+
+`<task_type>`::
+(Optional, string)
+The type of {infer} task that the model performs.
+
+
+[discrete]
+[[stream-inference-api-request-body]]
+==== {api-request-body-title}
+
+`input`::
+(Required, string or array of strings)
+The text on which you want to perform the {infer} task.
+`input` can be a single string or an array.
++
+--
+[NOTE]
+====
+Inference endpoints for the `completion` task type currently only support a
+single string as input.
+====
+--
+
+
+[discrete]
+[[post-inference-api-example]]
+==== {api-examples-title}
+
+
+[discrete]
+[[inference-example-stream]]
+===== Completion example
+
+The following example performs a completion on the example question.
+
+
+[source,console]
+------------------------------------------------------------
+POST _inference/completion/openai_chat_completions
+{
+  "input": "What is Elastic?"
+}
+------------------------------------------------------------
+// TEST[skip:TBD]
+
+
+The API returns the following response:
+
+
+[source,console-result]
+------------------------------------------------------------
+{
+  "completion": [
+    {
+      "result": "Elastic is a company that provides a range of software solutions for search, logging, security, and analytics. Their flagship product is Elasticsearch, an open-source, distributed search engine that allows users to search, analyze, and visualize large volumes of data in real-time. Elastic also offers products such as Kibana, a data visualization tool, and Logstash, a log management and pipeline tool, as well as various other tools and solutions for data analysis and management."
+    }
+  ]
+}
+------------------------------------------------------------
+// NOTCONSOLE

From c680921354ab6eed9ea4aa1ed2c78ef0434dc73c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?=
 <szabosteve@gmail.com>
Date: Tue, 22 Oct 2024 17:00:16 +0200
Subject: [PATCH 3/7] [DOCS] Gives a streaming example.

---
 .../inference/stream-inference.asciidoc       | 28 ++++++++++++++++---
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/docs/reference/inference/stream-inference.asciidoc b/docs/reference/inference/stream-inference.asciidoc
index a36a16ccc5181..2884947bc7686 100644
--- a/docs/reference/inference/stream-inference.asciidoc
+++ b/docs/reference/inference/stream-inference.asciidoc
@@ -24,6 +24,7 @@ However, if you do not plan to use the {infer} APIs to use these models or if yo
 
 * Requires the `monitor_inference` <<privileges-list-cluster,cluster privilege>>
 (the built-in `inference_admin` and `inference_user` roles grant this privilege)
+* You must use a client that supports streaming.
 
 [discrete]
 [[stream-inference-api-desc]]
@@ -79,7 +80,7 @@ The following example performs a completion on the example question.
 
 [source,console]
 ------------------------------------------------------------
-POST _inference/completion/openai_chat_completions
+POST _inference/completion/openai-completion/_stream
 {
   "input": "What is Elastic?"
 }
@@ -92,12 +93,31 @@ The API returns the following response:
 
 [source,console-result]
 ------------------------------------------------------------
-{
-  "completion": [
+event: message
+data: {
+  "completion":[{
+    "delta":"Elastic"
+  }]
+}
+event: message
+data: {
+  "completion":[{
+    "delta":" is"
+    },
     {
-      "result": "Elastic is a company that provides a range of software solutions for search, logging, security, and analytics. Their flagship product is Elasticsearch, an open-source, distributed search engine that allows users to search, analyze, and visualize large volumes of data in real-time. Elastic also offers products such as Kibana, a data visualization tool, and Logstash, a log management and pipeline tool, as well as various other tools and solutions for data analysis and management."
+    "delta":" a"
     }
   ]
 }
+event: message
+data: {
+  "completion":[{
+    "delta":" software"
+  },
+  {
+    "delta":" company"
+  }]
+}
+(...)
 ------------------------------------------------------------
 // NOTCONSOLE

From f59f377a2b2a2fc2d7a64f2619989d845a6a610b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?=
 <szabosteve@gmail.com>
Date: Tue, 22 Oct 2024 17:30:33 +0200
Subject: [PATCH 4/7] [DOCS] Updates section ID.

---
 docs/reference/inference/stream-inference.asciidoc | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/docs/reference/inference/stream-inference.asciidoc b/docs/reference/inference/stream-inference.asciidoc
index 2884947bc7686..4da1588a9777b 100644
--- a/docs/reference/inference/stream-inference.asciidoc
+++ b/docs/reference/inference/stream-inference.asciidoc
@@ -67,15 +67,10 @@ single string as input.
 
 
 [discrete]
-[[post-inference-api-example]]
+[[stream-inference-api-example]]
 ==== {api-examples-title}
 
-
-[discrete]
-[[inference-example-stream]]
-===== Completion example
-
-The following example performs a completion on the example question.
+The following example performs a completion on the example question with streaming.
 
 
 [source,console]

From bb3cfafcac86552ae8ca34474fe42b78e7646d0c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?=
 <szabosteve@gmail.com>
Date: Tue, 22 Oct 2024 18:09:04 +0200
Subject: [PATCH 5/7] [DOCS] Changes snippet type.

---
 docs/reference/inference/stream-inference.asciidoc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/reference/inference/stream-inference.asciidoc b/docs/reference/inference/stream-inference.asciidoc
index 4da1588a9777b..65b8c4c399030 100644
--- a/docs/reference/inference/stream-inference.asciidoc
+++ b/docs/reference/inference/stream-inference.asciidoc
@@ -86,7 +86,7 @@ POST _inference/completion/openai-completion/_stream
 The API returns the following response:
 
 
-[source,console-result]
+[source,txt]
 ------------------------------------------------------------
 event: message
 data: {

From ee36692314e7c25a8c2cf4486208d2493dd4956f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?=
 <istvan.szabo@elastic.co>
Date: Thu, 24 Oct 2024 11:21:37 +0200
Subject: [PATCH 6/7] Update docs/reference/inference/stream-inference.asciidoc

---
 docs/reference/inference/stream-inference.asciidoc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/reference/inference/stream-inference.asciidoc b/docs/reference/inference/stream-inference.asciidoc
index 65b8c4c399030..800d4f3e79cc6 100644
--- a/docs/reference/inference/stream-inference.asciidoc
+++ b/docs/reference/inference/stream-inference.asciidoc
@@ -26,6 +26,7 @@ However, if you do not plan to use the {infer} APIs to use these models or if yo
 (the built-in `inference_admin` and `inference_user` roles grant this privilege)
 * You must use a client that supports streaming.
 
+
 [discrete]
 [[stream-inference-api-desc]]
 ==== {api-description-title}

From 7195c645342754bb20681272e539f35a240d5c53 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?=
 <istvan.szabo@elastic.co>
Date: Fri, 25 Oct 2024 08:46:41 +0200
Subject: [PATCH 7/7] Update docs/reference/inference/stream-inference.asciidoc

Co-authored-by: Pat Whelan <pat.whelan@elastic.co>
---
 docs/reference/inference/stream-inference.asciidoc | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/docs/reference/inference/stream-inference.asciidoc b/docs/reference/inference/stream-inference.asciidoc
index 800d4f3e79cc6..e66acd630cb3e 100644
--- a/docs/reference/inference/stream-inference.asciidoc
+++ b/docs/reference/inference/stream-inference.asciidoc
@@ -95,6 +95,7 @@ data: {
     "delta":"Elastic"
   }]
 }
+
 event: message
 data: {
   "completion":[{
@@ -105,6 +106,7 @@ data: {
     }
   ]
 }
+
 event: message
 data: {
   "completion":[{
@@ -114,6 +116,7 @@ data: {
     "delta":" company"
   }]
 }
+
 (...)
 ------------------------------------------------------------
 // NOTCONSOLE