davidkyle
diff --git a/‎docs/changelog/115594.yaml‎
Lines changed: 6 additions & 0 deletions b/‎docs/changelog/115594.yaml‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎docs/reference/inference/inference-apis.asciidoc‎
Lines changed: 2 additions & 0 deletions b/‎docs/reference/inference/inference-apis.asciidoc‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎docs/reference/inference/stream-inference.asciidoc‎
Lines changed: 122 additions & 0 deletions b/‎docs/reference/inference/stream-inference.asciidoc‎
Lines changed: 122 additions & 0 deletions
diff --git a/‎muted-tests.yml‎
Lines changed: 0 additions & 2 deletions b/‎muted-tests.yml‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎qa/no-bootstrap-tests/src/test/java/org/elasticsearch/bootstrap/SpawnerNoBootstrapTests.java‎
Lines changed: 2 additions & 1 deletion b/‎qa/no-bootstrap-tests/src/test/java/org/elasticsearch/bootstrap/SpawnerNoBootstrapTests.java‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎qa/smoke-test-ingest-with-all-dependencies/src/yamlRestTest/resources/rest-api-spec/test/ingest/80_ingest_simulate.yml‎
Lines changed: 7 additions & 0 deletions b/‎qa/smoke-test-ingest-with-all-dependencies/src/yamlRestTest/resources/rest-api-spec/test/ingest/80_ingest_simulate.yml‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/10_basic.yml‎
Lines changed: 67 additions & 0 deletions b/‎rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/10_basic.yml‎
Lines changed: 67 additions & 0 deletions
@@ -0,0 +1,6 @@
+pr: 115594
+summary: Update `BlobCacheBufferedIndexInput::readVLong` to correctly handle negative
+  long values
+area: Search
+type: bug
+issues: []
@@ -19,6 +19,7 @@ the following APIs to manage {infer} models and perform {infer}:
 * <<get-inference-api>>
 * <<post-inference-api>>
 * <<put-inference-api>>
+* <<stream-inference-api>>
 * <<update-inference-api>>
 
 [[inference-landscape]]
@@ -56,6 +57,7 @@ include::delete-inference.asciidoc[]
 include::get-inference.asciidoc[]
 include::post-inference.asciidoc[]
 include::put-inference.asciidoc[]
+include::stream-inference.asciidoc[]
 include::update-inference.asciidoc[]
 include::service-alibabacloud-ai-search.asciidoc[]
 include::service-amazon-bedrock.asciidoc[]
 
@@ -0,0 +1,122 @@
+[role="xpack"]
+[[stream-inference-api]]
+=== Stream inference API
+
+Streams a chat completion response.
+
+IMPORTANT: The {infer} APIs enable you to use certain services, such as built-in {ml} models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.
+For built-in models and models uploaded through Eland, the {infer} APIs offer an alternative way to use and manage trained models.
+However, if you do not plan to use the {infer} APIs to use these models or if you want to use non-NLP models, use the <<ml-df-trained-models-apis>>.
+
+
+[discrete]
+[[stream-inference-api-request]]
+==== {api-request-title}
+
+`POST /_inference/<inference_id>/_stream`
+
+`POST /_inference/<task_type>/<inference_id>/_stream`
+
+
+[discrete]
+[[stream-inference-api-prereqs]]
+==== {api-prereq-title}
+
+* Requires the `monitor_inference` <<privileges-list-cluster,cluster privilege>>
+(the built-in `inference_admin` and `inference_user` roles grant this privilege)
+* You must use a client that supports streaming.
+
+
+[discrete]
+[[stream-inference-api-desc]]
+==== {api-description-title}
+
+The stream {infer} API enables real-time responses for completion tasks by delivering answers incrementally, reducing response times during computation.
+It only works with the `completion` task type.
+
+
+[discrete]
+[[stream-inference-api-path-params]]
+==== {api-path-parms-title}
+
+`<inference_id>`::
+(Required, string)
+The unique identifier of the {infer} endpoint.
+
+
+`<task_type>`::
+(Optional, string)
+The type of {infer} task that the model performs.
+
+
+[discrete]
+[[stream-inference-api-request-body]]
+==== {api-request-body-title}
+
+`input`::
+(Required, string or array of strings)
+The text on which you want to perform the {infer} task.
+`input` can be a single string or an array.
++
+--
+[NOTE]
+====
+Inference endpoints for the `completion` task type currently only support a
+single string as input.
+====
+--
+
+
+[discrete]
+[[stream-inference-api-example]]
+==== {api-examples-title}
+
+The following example performs a completion on the example question with streaming.
+
+
+[source,console]
+------------------------------------------------------------
+POST _inference/completion/openai-completion/_stream
+{
+  "input": "What is Elastic?"
+}
+------------------------------------------------------------
+// TEST[skip:TBD]
+
+
+The API returns the following response:
+
+
+[source,txt]
+------------------------------------------------------------
+event: message
+data: {
+  "completion":[{
+    "delta":"Elastic"
+  }]
+}
+
+event: message
+data: {
+  "completion":[{
+    "delta":" is"
+    },
+    {
+    "delta":" a"
+    }
+  ]
+}
+
+event: message
+data: {
+  "completion":[{
+    "delta":" software"
+  },
+  {
+    "delta":" company"
+  }]
+}
+
+(...)
+------------------------------------------------------------
+// NOTCONSOLE
@@ -349,8 +349,6 @@ tests:
 - class: org.elasticsearch.xpack.security.operator.OperatorPrivilegesIT
   method: testEveryActionIsEitherOperatorOnlyOrNonOperator
   issue: https://github.com/elastic/elasticsearch/issues/102992
-- class: org.elasticsearch.bootstrap.SpawnerNoBootstrapTests
-  issue: https://github.com/elastic/elasticsearch/issues/114555
 - class: org.elasticsearch.test.rest.yaml.RcsCcsCommonYamlTestSuiteIT
   method: test {p0=search.vectors/42_knn_search_int4_flat/Vector similarity with filter only}
   issue: https://github.com/elastic/elasticsearch/issues/115475
 
@@ -24,6 +24,7 @@
 import org.elasticsearch.plugins.Platforms;
 import org.elasticsearch.plugins.PluginTestUtil;
 import org.elasticsearch.test.GraalVMThreadsFilter;
+import org.elasticsearch.test.JnaCleanerThreadsFilter;
 import org.elasticsearch.test.MockLog;
 
 import java.io.IOException;
@@ -50,7 +51,7 @@
  * that prevents the Spawner class from doing its job. Also needs to run in a separate JVM to other
  * tests that extend ESTestCase for the same reason.
  */
-@ThreadLeakFilters(filters = { GraalVMThreadsFilter.class })
+@ThreadLeakFilters(filters = { GraalVMThreadsFilter.class, JnaCleanerThreadsFilter.class })
 public class SpawnerNoBootstrapTests extends LuceneTestCase {
 
     private static final String CONTROLLER_SOURCE = """
 
@@ -1588,6 +1588,13 @@ setup:
       cluster_features: ["simulate.support.non.template.mapping"]
       reason: "ingest simulate support for indices with mappings that didn't come from templates added in 8.17"
 
+  # A global match-everything legacy template is added to the cluster sometimes (rarely). We have to get rid of this template if it exists
+  # because this test is making sure we get correct behavior when an index matches *no* template:
+  - do:
+      indices.delete_template:
+        name:   '*'
+        ignore: 404
+
   # First, make sure that validation fails before we create the index (since we are only defining to bar field but trying to index a value
   # for foo.
   - do:
 
@@ -149,3 +149,70 @@
       indices.exists_alias:
         name: logs_2022-12-31
   - is_true: ''
+
+---
+"Create lookup index":
+  - requires:
+      test_runner_features: [ capabilities, default_shards ]
+      capabilities:
+        - method: PUT
+          path: /{index}
+          capabilities: [ lookup_index_mode ]
+      reason: "Support for 'lookup' index mode capability required"
+  - do:
+      indices.create:
+        index: "test_lookup"
+        body:
+          settings:
+            index.mode: lookup
+
+  - do:
+      indices.get_settings:
+        index: test_lookup
+
+  - match: { test_lookup.settings.index.number_of_shards: "1"}
+  - match: { test_lookup.settings.index.auto_expand_replicas: "0-all"}
+
+---
+"Create lookup index with one shard":
+  - requires:
+      test_runner_features: [ capabilities, default_shards ]
+      capabilities:
+        - method: PUT
+          path: /{index}
+          capabilities: [ lookup_index_mode ]
+      reason: "Support for 'lookup' index mode capability required"
+  - do:
+      indices.create:
+        index: "test_lookup"
+        body:
+          settings:
+            index:
+              mode: lookup
+              number_of_shards: 1
+
+  - do:
+      indices.get_settings:
+        index: test_lookup
+
+  - match: { test_lookup.settings.index.number_of_shards: "1"}
+  - match: { test_lookup.settings.index.auto_expand_replicas: "0-all"}
+
+---
+"Create lookup index with two shards":
+  - requires:
+      test_runner_features: [ capabilities ]
+      capabilities:
+        - method: PUT
+          path: /{index}
+          capabilities: [ lookup_index_mode ]
+      reason: "Support for 'lookup' index mode capability required"
+  - do:
+      catch: /illegal_argument_exception/
+      indices.create:
+        index: test_lookup
+        body:
+          settings:
+            index.mode: lookup
+            index.number_of_shards: 2
+