elastic
diff --git a/‎.buildkite/scripts/generate-pr-performance-benchmark.sh‎
Lines changed: 2 additions & 0 deletions b/‎.buildkite/scripts/generate-pr-performance-benchmark.sh‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎benchmarks/src/main/java/org/elasticsearch/benchmark/bytes/RecyclerBytesStreamOutputBenchmark.java‎
Lines changed: 5 additions & 47 deletions b/‎benchmarks/src/main/java/org/elasticsearch/benchmark/bytes/RecyclerBytesStreamOutputBenchmark.java‎
Lines changed: 5 additions & 47 deletions
diff --git a/‎benchmarks/src/main/java/org/elasticsearch/benchmark/common/util/UTF8StringBytesBenchmark.java‎
Lines changed: 148 additions & 0 deletions b/‎benchmarks/src/main/java/org/elasticsearch/benchmark/common/util/UTF8StringBytesBenchmark.java‎
Lines changed: 148 additions & 0 deletions
diff --git a/‎build-tools/src/main/java/org/elasticsearch/gradle/testclusters/ElasticsearchNode.java‎
Lines changed: 1 addition & 1 deletion b/‎build-tools/src/main/java/org/elasticsearch/gradle/testclusters/ElasticsearchNode.java‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/changelog/135886.yaml‎
Lines changed: 6 additions & 0 deletions b/‎docs/changelog/135886.yaml‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎docs/changelog/136066.yaml‎
Lines changed: 5 additions & 0 deletions b/‎docs/changelog/136066.yaml‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎docs/changelog/136119.yaml‎
Lines changed: 6 additions & 0 deletions b/‎docs/changelog/136119.yaml‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎docs/reference/elasticsearch/mapping-reference/semantic-text.md‎
Lines changed: 25 additions & 0 deletions b/‎docs/reference/elasticsearch/mapping-reference/semantic-text.md‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎docs/reference/elasticsearch/mapping-reference/sparse-vector.md‎
Lines changed: 2 additions & 2 deletions b/‎docs/reference/elasticsearch/mapping-reference/sparse-vector.md‎
Lines changed: 2 additions & 2 deletions
@@ -46,6 +46,7 @@ steps:
         CONFIGURATION_NAME: ${GITHUB_PR_COMMENT_VAR_BENCHMARK}
         ENV_ID: ${env_id_baseline}
         REVISION: ${merge_base}
+        BENCHMARK_TYPE: baseline
   - label: Trigger contender benchmark with ${GITHUB_PR_TRIGGERED_SHA:0:7}
     trigger: elasticsearch-performance-esbench-pr
     build:
@@ -56,6 +57,7 @@ steps:
         ENV_ID: ${env_id_contender}
         ES_REPO_URL: https://github.com/${GITHUB_PR_OWNER}/${GITHUB_PR_REPO}.git
         REVISION: ${GITHUB_PR_TRIGGERED_SHA}
+        BENCHMARK_TYPE: contender
   - wait: ~
   - label: Update PR comment and Buildkite annotation
     command: |
 
@@ -46,6 +46,7 @@ build/
 **/.local*
 .vagrant/
 /logs/
+**/target/
 
 # osx stuff
 .DS_Store
 
@@ -10,6 +10,7 @@
 package org.elasticsearch.benchmark.bytes;
 
 import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.benchmark.common.util.UTF8StringBytesBenchmark;
 import org.elasticsearch.common.io.stream.RecyclerBytesStreamOutput;
 import org.elasticsearch.common.recycler.Recycler;
 import org.openjdk.jmh.annotations.Benchmark;
@@ -65,10 +66,10 @@ public void initResults() throws IOException {
         // We use weights to generate certain sized UTF-8 characters and vInts. However, there is still some non-determinism which could
         // impact direct comparisons run-to-run
 
-        shortString = generateAsciiString(20);
-        longString = generateAsciiString(100);
-        nonAsciiString = generateUtf8String(200);
-        veryLongString = generateAsciiString(800);
+        shortString = UTF8StringBytesBenchmark.generateAsciiString(20);
+        longString = UTF8StringBytesBenchmark.generateAsciiString(100);
+        nonAsciiString = UTF8StringBytesBenchmark.generateUTF8String(200);
+        veryLongString = UTF8StringBytesBenchmark.generateAsciiString(800);
         // vint values for benchmarking
         vints = new int[1000];
         for (int i = 0; i < vints.length; i++) {
@@ -143,49 +144,6 @@ public void writeVInt() throws IOException {
         }
     }
 
-    public static String generateAsciiString(int n) {
-        ThreadLocalRandom random = ThreadLocalRandom.current();
-        StringBuilder sb = new StringBuilder(n);
-
-        for (int i = 0; i < n; i++) {
-            int ascii = random.nextInt(128);
-            sb.append((char) ascii);
-        }
-
-        return sb.toString();
-    }
-
-    public static String generateUtf8String(int n) {
-        ThreadLocalRandom random = ThreadLocalRandom.current();
-        StringBuilder sb = new StringBuilder(n);
-
-        for (int i = 0; i < n; i++) {
-            int codePoint;
-            int probability = random.nextInt(100);
-
-            if (probability < 85) {
-                // 1-byte UTF-8 (ASCII range)
-                // 0x0000 to 0x007F
-                codePoint = random.nextInt(0x0080);
-            } else if (probability < 95) {
-                // 2-byte UTF-8
-                // 0x0080 to 0x07FF
-                codePoint = random.nextInt(0x0080, 0x0800);
-            } else {
-                // 3-byte UTF-8
-                // 0x0800 to 0xFFFF
-                do {
-                    codePoint = random.nextInt(0x0800, 0x10000);
-                    // Skip surrogate pairs (0xD800-0xDFFF)
-                } while (codePoint >= 0xD800 && codePoint <= 0xDFFF);
-            }
-
-            sb.appendCodePoint(codePoint);
-        }
-
-        return sb.toString();
-    }
-
     private record BenchmarkRecycler(AtomicReference<BytesRef> bytesRef) implements Recycler<BytesRef> {
 
         @Override
 
@@ -0,0 +1,148 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.benchmark.common.util;
+
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.UnicodeUtil;
+import org.elasticsearch.common.UUIDs;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Warmup;
+
+import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
+import java.util.concurrent.ThreadLocalRandom;
+import java.util.concurrent.TimeUnit;
+
+@Warmup(iterations = 3)
+@Measurement(iterations = 3)
+@BenchmarkMode(Mode.AverageTime)
+@OutputTimeUnit(TimeUnit.NANOSECONDS)
+@Fork(value = 1)
+public class UTF8StringBytesBenchmark {
+
+    @State(Scope.Thread)
+    public static class StringState {
+        @Param({ "uuid", "short", "long", "nonAscii", "veryLong" })
+        String stringType;
+
+        String string;
+        BytesRef bytes;
+
+        @Setup
+        public void setup() {
+            string = switch (stringType) {
+                case "uuid" -> UUIDs.base64UUID();
+                case "short" -> generateAsciiString(20);
+                case "long" -> generateAsciiString(100);
+                case "nonAscii" -> generateUTF8String(200);
+                case "veryLong" -> generateAsciiString(1000);
+                default -> throw new IllegalArgumentException("Unknown stringType: " + stringType);
+            };
+            bytes = getBytes(string);
+        }
+    }
+
+    @Benchmark
+    public BytesRef getBytesJDK(StringState state) {
+        byte[] bytes = state.string.getBytes(StandardCharsets.UTF_8);
+        return new BytesRef(bytes, 0, bytes.length);
+    }
+
+    @Benchmark
+    public BytesRef getBytesUnicodeUtils(StringState state) {
+        String string = state.string;
+        int length = string.length();
+        int size = UnicodeUtil.calcUTF16toUTF8Length(string, 0, length);
+        byte[] out = new byte[size];
+        UnicodeUtil.UTF16toUTF8(string, 0, length, out, 0);
+        return new BytesRef(out, 0, out.length);
+    }
+
+    @Benchmark
+    public BytesRef getBytesByteBufferEncoder(StringState state) {
+        var byteBuff = StandardCharsets.UTF_8.encode(state.string);
+        assert byteBuff.hasArray();
+        return new BytesRef(byteBuff.array(), byteBuff.arrayOffset() + byteBuff.position(), byteBuff.remaining());
+    }
+
+    @Benchmark
+    public String getStringJDK(StringState state) {
+        BytesRef bytes = state.bytes;
+        return new String(bytes.bytes, bytes.offset, bytes.length, StandardCharsets.UTF_8);
+    }
+
+    @Benchmark
+    public String getStringByteBufferDecoder(StringState state) {
+        BytesRef bytes = state.bytes;
+        var byteBuff = ByteBuffer.wrap(bytes.bytes, bytes.offset, bytes.length);
+        return StandardCharsets.UTF_8.decode(byteBuff).toString();
+    }
+
+    private static BytesRef getBytes(String string) {
+        int before = ThreadLocalRandom.current().nextInt(0, 50);
+        int after = ThreadLocalRandom.current().nextInt(0, 50);
+        byte[] stringBytes = string.getBytes(StandardCharsets.UTF_8);
+        byte[] finalBytes = new byte[before + after + stringBytes.length];
+        System.arraycopy(stringBytes, 0, finalBytes, before, stringBytes.length);
+        return new BytesRef(finalBytes, before, stringBytes.length);
+    }
+
+    public static String generateAsciiString(int n) {
+        ThreadLocalRandom random = ThreadLocalRandom.current();
+        StringBuilder sb = new StringBuilder(n);
+
+        for (int i = 0; i < n; i++) {
+            int ascii = random.nextInt(128);
+            sb.append((char) ascii);
+        }
+
+        return sb.toString();
+    }
+
+    public static String generateUTF8String(int n) {
+        ThreadLocalRandom random = ThreadLocalRandom.current();
+        StringBuilder sb = new StringBuilder(n);
+
+        for (int i = 0; i < n; i++) {
+            int codePoint;
+            int probability = random.nextInt(100);
+
+            if (probability < 85) {
+                // 1-byte UTF-8 (ASCII range)
+                // 0x0000 to 0x007F
+                codePoint = random.nextInt(0x0080);
+            } else if (probability < 95) {
+                // 2-byte UTF-8
+                // 0x0080 to 0x07FF
+                codePoint = random.nextInt(0x0080, 0x0800);
+            } else {
+                // 3-byte UTF-8
+                // 0x0800 to 0xFFFF
+                do {
+                    codePoint = random.nextInt(0x0800, 0x10000);
+                    // Skip surrogate pairs (0xD800-0xDFFF)
+                } while (codePoint >= 0xD800 && codePoint <= 0xDFFF);
+            }
+
+            sb.appendCodePoint(codePoint);
+        }
+
+        return sb.toString();
+    }
+}
@@ -1407,7 +1407,7 @@ private void createConfiguration() {
         // Limit the number of allocated processors for all nodes in the cluster by default.
         // This is to ensure that the tests run consistently across different environments.
         String processorCount = shouldConfigureTestClustersWithOneProcessor() ? "1" : "2";
-        if (getVersion().onOrAfter("7.6.0")) {
+        if (getVersion().onOrAfter("7.4.0")) {
             baseConfig.put("node.processors", processorCount);
         } else {
             baseConfig.put("processors", processorCount);
 
@@ -0,0 +1,6 @@
+pr: 135886
+summary: Provide defaults for index sort settings
+area: Mapping
+type: bug
+issues:
+ - 129062
@@ -0,0 +1,5 @@
+pr: 136066
+summary: Simulate shards moved by explicit commands
+area: Allocation
+type: enhancement
+issues: []
@@ -0,0 +1,6 @@
+pr: 136119
+summary: Fix logsdb settings provider mapping filters
+area: Logs
+type: bug
+issues:
+ - 136107
@@ -611,6 +611,31 @@ PUT test-index
 }
 ```
 
+## Querying `semantic_text` fields [querying-semantic-text-fields]
+
+You can query `semantic_text` fields using the following query types:
+
+- Match query: The recommended method for querying `semantic_text` fields. You can use [Query DSL](/reference/query-languages/query-dsl/query-dsl-match-query.md) or [ES|QL](/reference/query-languages/esql/functions-operators/search-functions.md#esql-match) syntax.
+<!--
+Refer to examples of match queries on `semantic_text` fields. 
+-->
+
+- [kNN query](/reference/query-languages/query-dsl/query-dsl-knn-query.md): Finds the nearest vectors to a query vector using a similarity metric, mainly for advanced or combined search use cases. 
+<!-- 
+Refer to examples of kNN queries on `semantic_text` fields. 
+-->
+
+- [Sparse vector query](/reference/query-languages/query-dsl/query-dsl-sparse-vector-query.md): Executes searches using sparse vectors generated by a sparse retrieval model such as [ELSER](docs-content://explore-analyze/machine-learning/nlp/ml-nlp-elser.md).
+<!-- 
+Refer to examples of sparse vector queries on `semantic_text` fields.
+-->
+
+- [Semantic query](/reference/query-languages/query-dsl/query-dsl-semantic-query.md): We don't recommend this legacy query type for _new_ projects, because the alternatives in this list enable more flexibility and customization. The `semantic` query remains available to support existing implementations.
+<!-- 
+Refer to examples of semantic queries on `semantic_text` fields.
+-->
+
+
 ## Troubleshooting semantic_text fields [troubleshooting-semantic-text-fields]
 
 If you want to verify that your embeddings look correct, you can view the
 
@@ -95,13 +95,13 @@ This ensures that:
 * The tokens that are kept are frequent enough and have significant scoring.
 * Very infrequent tokens that may not have as high of a score are removed.
 
-## Accessing `dense_vector` fields in search responses
+## Accessing `sparse_vector` fields in search responses
 ```{applies_to}
 stack: ga 9.2
 serverless: ga
 ```
 
-By default, `dense_vector` fields are **not included in `_source`** in responses from the `_search`, `_msearch`, `_get`, and `_mget` APIs.
+By default, `sparse_vector` fields are **not included in `_source`** in responses from the `_search`, `_msearch`, `_get`, and `_mget` APIs.
 This helps reduce response size and improve performance, especially in scenarios where vectors are used solely for similarity scoring and not required in the output.
 
 To retrieve vector values explicitly, you can use: