elastic
diff --git a/‎build-tools-internal/src/main/groovy/elasticsearch.ide.gradle‎
Lines changed: 34 additions & 1 deletion b/‎build-tools-internal/src/main/groovy/elasticsearch.ide.gradle‎
Lines changed: 34 additions & 1 deletion
diff --git a/‎build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/InternalDistributionDownloadPlugin.java‎
Lines changed: 0 additions & 3 deletions b/‎build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/InternalDistributionDownloadPlugin.java‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/distribution/DockerCloudElasticsearchDistributionType.java‎
Lines changed: 0 additions & 27 deletions b/‎build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/distribution/DockerCloudElasticsearchDistributionType.java‎
Lines changed: 0 additions & 27 deletions
diff --git a/‎build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/distribution/InternalElasticsearchDistributionTypes.java‎
Lines changed: 0 additions & 2 deletions b/‎build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/distribution/InternalElasticsearchDistributionTypes.java‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/test/DistroTestPlugin.java‎
Lines changed: 0 additions & 2 deletions b/‎build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/test/DistroTestPlugin.java‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎docs/changelog/113563.yaml‎
Lines changed: 0 additions & 5 deletions b/‎docs/changelog/113563.yaml‎
Lines changed: 0 additions & 5 deletions
diff --git a/‎docs/reference/connector/docs/connectors-API-tutorial.asciidoc‎
Lines changed: 1 addition & 1 deletion b/‎docs/reference/connector/docs/connectors-API-tutorial.asciidoc‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/reference/inference/inference-apis.asciidoc‎
Lines changed: 61 additions & 1 deletion b/‎docs/reference/inference/inference-apis.asciidoc‎
Lines changed: 61 additions & 1 deletion
diff --git a/‎docs/reference/inference/inference-shared.asciidoc‎
Lines changed: 33 additions & 1 deletion b/‎docs/reference/inference/inference-shared.asciidoc‎
Lines changed: 33 additions & 1 deletion
diff --git a/‎docs/reference/inference/service-alibabacloud-ai-search.asciidoc‎
Lines changed: 20 additions & 1 deletion b/‎docs/reference/inference/service-alibabacloud-ai-search.asciidoc‎
Lines changed: 20 additions & 1 deletion
@@ -122,6 +122,36 @@ if (providers.systemProperty('idea.active').getOrNull() == 'true') {
       .findAll { it != null }
   }
 
+  // force IntelliJ to generate *.iml files for each imported module
+  tasks.register("enableExternalConfiguration") {
+    group = 'ide'
+    description = 'Enable per-module *.iml files'
+
+    doLast {
+      modifyXml('.idea/misc.xml') {xml ->
+        def externalStorageConfig = xml.component.find { it.'@name' == 'ExternalStorageConfigurationManager' }
+        if (externalStorageConfig) {
+          xml.remove(externalStorageConfig)
+        }
+      }
+    }
+  }
+
+  // modifies the idea module config to enable preview features on 'elasticsearch-native' module
+  tasks.register("enablePreviewFeatures") {
+    group = 'ide'
+    description = 'Enables preview features on native library module'
+    dependsOn tasks.named("enableExternalConfiguration")
+
+    doLast {
+      ['main', 'test'].each { sourceSet ->
+        modifyXml(".idea/modules/libs/native/elasticsearch.libs.elasticsearch-native.${sourceSet}.iml") { xml ->
+          xml.component.find { it.'@name' == 'NewModuleRootManager' }?.'@LANGUAGE_LEVEL' = 'JDK_21_PREVIEW'
+        }
+      }
+    }
+  }
+
   tasks.register('buildDependencyArtifacts') {
     group = 'ide'
     description = 'Builds artifacts needed as dependency for IDE modules'
@@ -149,7 +179,10 @@ if (providers.systemProperty('idea.active').getOrNull() == 'true') {
           testRunner = 'choose_per_test'
         }
         taskTriggers {
-          afterSync tasks.named('configureIdeCheckstyle'), tasks.named('configureIdeaGradleJvm'), tasks.named('buildDependencyArtifacts')
+          afterSync tasks.named('configureIdeCheckstyle'),
+            tasks.named('configureIdeaGradleJvm'),
+            tasks.named('buildDependencyArtifacts'),
+            tasks.named('enablePreviewFeatures')
         }
         encodings {
           encoding = 'UTF-8'
 
@@ -172,9 +172,6 @@ private static String distributionProjectName(ElasticsearchDistribution distribu
         if (distribution.getType() == InternalElasticsearchDistributionTypes.DOCKER_IRONBANK) {
             return projectName + "ironbank-docker" + archString + "-export";
         }
-        if (distribution.getType() == InternalElasticsearchDistributionTypes.DOCKER_CLOUD) {
-            return projectName + "cloud-docker" + archString + "-export";
-        }
         if (distribution.getType() == InternalElasticsearchDistributionTypes.DOCKER_CLOUD_ESS) {
             return projectName + "cloud-ess-docker" + archString + "-export";
         }
 
@@ -19,7 +19,6 @@ public class InternalElasticsearchDistributionTypes {
     public static ElasticsearchDistributionType DOCKER = new DockerElasticsearchDistributionType();
     public static ElasticsearchDistributionType DOCKER_UBI = new DockerUbiElasticsearchDistributionType();
     public static ElasticsearchDistributionType DOCKER_IRONBANK = new DockerIronBankElasticsearchDistributionType();
-    public static ElasticsearchDistributionType DOCKER_CLOUD = new DockerCloudElasticsearchDistributionType();
     public static ElasticsearchDistributionType DOCKER_CLOUD_ESS = new DockerCloudEssElasticsearchDistributionType();
     public static ElasticsearchDistributionType DOCKER_WOLFI = new DockerWolfiElasticsearchDistributionType();
 
@@ -29,7 +28,6 @@ public class InternalElasticsearchDistributionTypes {
         DOCKER,
         DOCKER_UBI,
         DOCKER_IRONBANK,
-        DOCKER_CLOUD,
         DOCKER_CLOUD_ESS,
         DOCKER_WOLFI
     );
 
@@ -49,7 +49,6 @@
 import static org.elasticsearch.gradle.internal.distribution.InternalElasticsearchDistributionTypes.ALL_INTERNAL;
 import static org.elasticsearch.gradle.internal.distribution.InternalElasticsearchDistributionTypes.DEB;
 import static org.elasticsearch.gradle.internal.distribution.InternalElasticsearchDistributionTypes.DOCKER;
-import static org.elasticsearch.gradle.internal.distribution.InternalElasticsearchDistributionTypes.DOCKER_CLOUD;
 import static org.elasticsearch.gradle.internal.distribution.InternalElasticsearchDistributionTypes.DOCKER_CLOUD_ESS;
 import static org.elasticsearch.gradle.internal.distribution.InternalElasticsearchDistributionTypes.DOCKER_IRONBANK;
 import static org.elasticsearch.gradle.internal.distribution.InternalElasticsearchDistributionTypes.DOCKER_UBI;
@@ -149,7 +148,6 @@ private static Map<ElasticsearchDistributionType, TaskProvider<?>> lifecycleTask
         lifecyleTasks.put(DOCKER, project.getTasks().register(taskPrefix + ".docker"));
         lifecyleTasks.put(DOCKER_UBI, project.getTasks().register(taskPrefix + ".docker-ubi"));
         lifecyleTasks.put(DOCKER_IRONBANK, project.getTasks().register(taskPrefix + ".docker-ironbank"));
-        lifecyleTasks.put(DOCKER_CLOUD, project.getTasks().register(taskPrefix + ".docker-cloud"));
         lifecyleTasks.put(DOCKER_CLOUD_ESS, project.getTasks().register(taskPrefix + ".docker-cloud-ess"));
         lifecyleTasks.put(DOCKER_WOLFI, project.getTasks().register(taskPrefix + ".docker-wolfi"));
         lifecyleTasks.put(ARCHIVE, project.getTasks().register(taskPrefix + ".archives"));
 
@@ -367,7 +367,7 @@ Refer to the individual connectors-references,connector references for these con
 ====
 We're using a self-managed connector in this tutorial.
 To use these APIs with an Elastic managed connector, there's some extra setup for API keys.
-Refer to native-connectors-manage-API-keys for details.
+Refer to <<es-native-connectors-manage-API-keys>> for details.
 ====
 
 We're now ready to sync our PostgreSQL data to {es}.
 
@@ -35,7 +35,6 @@ Elastic –, then create an {infer} endpoint by the <<put-inference-api>>.
 Now use <<semantic-search-semantic-text, semantic text>> to perform
 <<semantic-search, semantic search>> on your data.
 
-
 [discrete]
 [[default-enpoints]]
 === Default {infer} endpoints
@@ -53,6 +52,67 @@ For these models, the minimum number of allocations is `0`.
 If there is no {infer} activity that uses the endpoint, the number of allocations will scale down to `0` automatically after 15 minutes.
 
 
+[discrete]
+[[infer-chunking-config]]
+=== Configuring chunking
+
+{infer-cap} endpoints have a limit on the amount of text they can process at once, determined by the model's input capacity.
+Chunking is the process of splitting the input text into pieces that remain within these limits.
+It occurs when ingesting documents into <<semantic-text,`semantic_text` fields>>.
+Chunking also helps produce sections that are digestible for humans.
+Returning a long document in search results is less useful than providing the most relevant chunk of text.
+
+Each chunk will include the text subpassage and the corresponding embedding generated from it.
+
+By default, documents are split into sentences and grouped in sections up to 250 words with 1 sentence overlap so that each chunk shares a sentence with the previous chunk.
+Overlapping ensures continuity and prevents vital contextual information in the input text from being lost by a hard break. 
+
+{es} uses the https://unicode-org.github.io/icu-docs/[ICU4J] library to detect word and sentence boundaries for chunking.
+https://unicode-org.github.io/icu/userguide/boundaryanalysis/#word-boundary[Word boundaries] are identified by following a series of rules, not just the presence of a whitespace character.
+For written languages that do use whitespace such as Chinese or Japanese dictionary lookups are used to detect word boundaries.
+
+
+[discrete]
+==== Chunking strategies
+
+Two strategies are available for chunking: `sentence` and `word`.
+
+The `sentence` strategy splits the input text at sentence boundaries.
+Each chunk contains one or more complete sentences ensuring that the integrity of sentence-level context is preserved, except when a sentence causes a chunk to exceed a word count of `max_chunk_size`, in which case it will be split across chunks.
+The `sentence_overlap` option defines the number of sentences from the previous chunk to include in the current chunk which is either `0` or `1`.
+
+The `word` strategy splits the input text on individual words up to the `max_chunk_size` limit.
+The `overlap` option is the number of words from the previous chunk to include in the current chunk.
+
+The default chunking strategy is `sentence`.
+
+NOTE: The default chunking strategy for {infer} endpoints created before 8.16 is `word`.
+
+
+[discrete]
+==== Example of configuring the chunking behavior
+
+The following example creates an {infer} endpoint with the `elasticsearch` service that deploys the ELSER model by default and configures the chunking behavior.
+
+[source,console]
+------------------------------------------------------------
+PUT _inference/sparse_embedding/small_chunk_size
+{
+  "service": "elasticsearch",
+  "service_settings": {
+    "num_allocations": 1,
+    "num_threads": 1
+  },
+  "chunking_settings": {
+    "strategy": "sentence",
+    "max_chunk_size": 100,
+    "sentence_overlap": 0
+  }
+}
+------------------------------------------------------------
+// TEST[skip:TBD]
+
+
 include::delete-inference.asciidoc[]
 include::get-inference.asciidoc[]
 include::post-inference.asciidoc[]
 
@@ -31,4 +31,36 @@ end::task-settings[]
 
 tag::task-type[]
 The type of the {infer} task that the model will perform.
-end::task-type[]
+end::task-type[]
+
+tag::chunking-settings[]
+Chunking configuration object.
+Refer to <<infer-chunking-config>> to learn more about chunking.
+end::chunking-settings[]
+
+tag::chunking-settings-max-chunking-size[]
+Specifies the maximum size of a chunk in words.
+Defaults to `250`.
+This value cannot be higher than `300` or lower than `20` (for `sentence` strategy) or `10` (for `word` strategy). 
+end::chunking-settings-max-chunking-size[]
+
+tag::chunking-settings-overlap[]
+Only for `word` chunking strategy.
+Specifies the number of overlapping words for chunks.
+Defaults to `100`.
+This value cannot be higher than the half of `max_chunking_size`.
+end::chunking-settings-overlap[]
+
+tag::chunking-settings-sentence-overlap[]
+Only for `sentence` chunking strategy.
+Specifies the numnber of overlapping sentences for chunks.
+It can be either `1` or `0`.
+Defaults to `1`.
+end::chunking-settings-sentence-overlap[]
+
+tag::chunking-settings-strategy[]
+Specifies the chunking strategy.
+It could be either `sentence` or `word`.
+end::chunking-settings-strategy[]
+
+
@@ -34,6 +34,26 @@ Available task types:
 [[infer-service-alibabacloud-ai-search-api-request-body]]
 ==== {api-request-body-title}
 
+`chunking_settings`::
+(Optional, object)
+include::inference-shared.asciidoc[tag=chunking-settings]
+
+`max_chunking_size`:::
+(Optional, integer)
+include::inference-shared.asciidoc[tag=chunking-settings-max-chunking-size]
+
+`overlap`:::
+(Optional, integer)
+include::inference-shared.asciidoc[tag=chunking-settings-overlap]
+
+`sentence_overlap`:::
+(Optional, integer)
+include::inference-shared.asciidoc[tag=chunking-settings-sentence-overlap]
+
+`strategy`:::
+(Optional, string)
+include::inference-shared.asciidoc[tag=chunking-settings-strategy]
+
 `service`::
 (Required, string) The type of service supported for the specified task type.
 In this case,
@@ -108,7 +128,6 @@ To modify this, set the `requests_per_minute` setting of this object in your ser
 include::inference-shared.asciidoc[tag=request-per-minute-example]
 --
 
-
 `task_settings`::
 (Optional, object)
 include::inference-shared.asciidoc[tag=task-settings]
Original file line number	Diff line number	Diff line change
`@@ -172,9 +172,6 @@ private static String distributionProjectName(ElasticsearchDistribution distribu`
`172`	`172`	`if (distribution.getType() == InternalElasticsearchDistributionTypes.DOCKER_IRONBANK) {`
`173`	`173`	`return projectName + "ironbank-docker" + archString + "-export";`
`174`	`174`	`}`
`175`		`- if (distribution.getType() == InternalElasticsearchDistributionTypes.DOCKER_CLOUD) {`
`176`		`- return projectName + "cloud-docker" + archString + "-export";`
`177`		`- }`
`178`	`175`	`if (distribution.getType() == InternalElasticsearchDistributionTypes.DOCKER_CLOUD_ESS) {`
`179`	`176`	`return projectName + "cloud-ess-docker" + archString + "-export";`
`180`	`177`	`}`