julian-elastic
diff --git a/‎.buildkite/pull-requests.json‎
Lines changed: 13 additions & 0 deletions b/‎.buildkite/pull-requests.json‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎benchmarks/src/main/java/org/elasticsearch/benchmark/vector/PackAsBinaryBenchmark.java‎
Lines changed: 85 additions & 0 deletions b/‎benchmarks/src/main/java/org/elasticsearch/benchmark/vector/PackAsBinaryBenchmark.java‎
Lines changed: 85 additions & 0 deletions
diff --git a/‎benchmarks/src/main/java/org/elasticsearch/benchmark/vector/TransposeHalfByteBenchmark.java‎
Lines changed: 86 additions & 0 deletions b/‎benchmarks/src/main/java/org/elasticsearch/benchmark/vector/TransposeHalfByteBenchmark.java‎
Lines changed: 86 additions & 0 deletions
diff --git a/‎docs/changelog/131559.yaml‎
Lines changed: 10 additions & 0 deletions b/‎docs/changelog/131559.yaml‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎docs/changelog/132011.yaml‎
Lines changed: 5 additions & 0 deletions b/‎docs/changelog/132011.yaml‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎docs/changelog/132064.yaml‎
Lines changed: 5 additions & 0 deletions b/‎docs/changelog/132064.yaml‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎docs/changelog/132638.yaml‎
Lines changed: 5 additions & 0 deletions b/‎docs/changelog/132638.yaml‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎docs/reference/elasticsearch/mapping-reference/semantic-text.md‎
Lines changed: 6 additions & 3 deletions b/‎docs/reference/elasticsearch/mapping-reference/semantic-text.md‎
Lines changed: 6 additions & 3 deletions
diff --git a/‎docs/reference/query-languages/esql/_snippets/commands/layout/lookup-join.md‎
Lines changed: 12 additions & 3 deletions b/‎docs/reference/query-languages/esql/_snippets/commands/layout/lookup-join.md‎
Lines changed: 12 additions & 3 deletions
diff --git a/‎docs/reference/query-languages/esql/esql-lookup-join.md‎
Lines changed: 8 additions & 5 deletions b/‎docs/reference/query-languages/esql/esql-lookup-join.md‎
Lines changed: 8 additions & 5 deletions
@@ -15,6 +15,19 @@
       "trigger_comment_regex": "(run\\W+elasticsearch-ci.+)|(^\\s*((buildkite|@elastic(search)?machine)\\s*)?test\\s+this(\\s+please)?)",
       "cancel_intermediate_builds": true,
       "cancel_intermediate_builds_on_comment": false
+    },
+    {
+      "enabled": true,
+      "pipeline_slug": "elasticsearch-performance-esbench-pr",
+      "allow_org_users": true,
+      "allowed_repo_permissions": [
+        "admin",
+        "write"
+      ],
+      "set_commit_status": false,
+      "build_on_commit": false,
+      "build_on_comment": true,
+      "trigger_comment_regex": "^(buildkite|@elastic(search)?machine) benchmark this with (?<benchmark>\\w+)( please)?$"
     }
   ]
 }
@@ -0,0 +1,85 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+package org.elasticsearch.benchmark.vector;
+
+import org.elasticsearch.common.logging.LogConfigurator;
+import org.elasticsearch.index.codec.vectors.BQVectorUtils;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.infra.Blackhole;
+
+import java.io.IOException;
+import java.util.Random;
+import java.util.concurrent.TimeUnit;
+
+@BenchmarkMode(Mode.Throughput)
+@OutputTimeUnit(TimeUnit.MILLISECONDS)
+@State(Scope.Benchmark)
+// first iteration is complete garbage, so make sure we really warmup
+@Warmup(iterations = 4, time = 1)
+// real iterations. not useful to spend tons of time here, better to fork more
+@Measurement(iterations = 5, time = 1)
+// engage some noise reduction
+@Fork(value = 1)
+public class PackAsBinaryBenchmark {
+
+    static {
+        LogConfigurator.configureESLogging(); // native access requires logging to be initialized
+    }
+
+    @Param({ "384", "782", "1024" })
+    int dims;
+
+    int length;
+
+    int numVectors = 1000;
+
+    int[][] qVectors;
+    byte[] packed;
+
+    @Setup
+    public void setup() throws IOException {
+        Random random = new Random(123);
+
+        this.length = BQVectorUtils.discretize(dims, 64) / 8;
+        this.packed = new byte[length];
+
+        qVectors = new int[numVectors][dims];
+        for (int[] qVector : qVectors) {
+            for (int i = 0; i < dims; i++) {
+                qVector[i] = random.nextInt(2);
+            }
+        }
+    }
+
+    @Benchmark
+    public void packAsBinary(Blackhole bh) {
+        for (int i = 0; i < numVectors; i++) {
+            BQVectorUtils.packAsBinary(qVectors[i], packed);
+            bh.consume(packed);
+        }
+    }
+
+    @Benchmark
+    public void packAsBinaryLegacy(Blackhole bh) {
+        for (int i = 0; i < numVectors; i++) {
+            BQVectorUtils.packAsBinaryLegacy(qVectors[i], packed);
+            bh.consume(packed);
+        }
+    }
+}
@@ -0,0 +1,86 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+package org.elasticsearch.benchmark.vector;
+
+import org.elasticsearch.common.logging.LogConfigurator;
+import org.elasticsearch.index.codec.vectors.BQSpaceUtils;
+import org.elasticsearch.index.codec.vectors.BQVectorUtils;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.infra.Blackhole;
+
+import java.io.IOException;
+import java.util.Random;
+import java.util.concurrent.TimeUnit;
+
+@BenchmarkMode(Mode.Throughput)
+@OutputTimeUnit(TimeUnit.MILLISECONDS)
+@State(Scope.Benchmark)
+// first iteration is complete garbage, so make sure we really warmup
+@Warmup(iterations = 4, time = 1)
+// real iterations. not useful to spend tons of time here, better to fork more
+@Measurement(iterations = 5, time = 1)
+// engage some noise reduction
+@Fork(value = 1)
+public class TransposeHalfByteBenchmark {
+
+    static {
+        LogConfigurator.configureESLogging(); // native access requires logging to be initialized
+    }
+
+    @Param({ "384", "782", "1024" })
+    int dims;
+
+    int length;
+
+    int numVectors = 1000;
+
+    int[][] qVectors;
+    byte[] packed;
+
+    @Setup
+    public void setup() throws IOException {
+        Random random = new Random(123);
+
+        this.length = 4 * BQVectorUtils.discretize(dims, 64) / 8;
+        this.packed = new byte[length];
+
+        qVectors = new int[numVectors][dims];
+        for (int[] qVector : qVectors) {
+            for (int i = 0; i < dims; i++) {
+                qVector[i] = random.nextInt(16);
+            }
+        }
+    }
+
+    @Benchmark
+    public void transposeHalfByte(Blackhole bh) {
+        for (int i = 0; i < numVectors; i++) {
+            BQSpaceUtils.transposeHalfByte(qVectors[i], packed);
+            bh.consume(packed);
+        }
+    }
+
+    @Benchmark
+    public void transposeHalfByteLegacy(Blackhole bh) {
+        for (int i = 0; i < numVectors; i++) {
+            BQSpaceUtils.transposeHalfByteLegacy(qVectors[i], packed);
+            bh.consume(packed);
+        }
+    }
+}
@@ -0,0 +1,10 @@
+pr: 131559
+summary: Add support for LOOKUP JOIN on multiple fields
+area: ES|QL
+type: enhancement
+issues: [ ]
+highlight:
+  title: Add support for Lookup Join on Multiple Fields
+  body: "Add support for Lookup Join on Multiple Fields e.g. FROM index1\n| LOOKUP\
+    \ JOIN lookup_index on field1, field2"
+  notable: true
@@ -0,0 +1,5 @@
+pr: 132011
+summary: Restrict Indexing To Child Streams When Streams Is Enabled
+area: Data streams
+type: enhancement
+issues: []
@@ -0,0 +1,5 @@
+pr: 132064
+summary: Only Allow Enabling Streams If No Conflicting Indices Exist
+area: Data streams
+type: enhancement
+issues: []
@@ -0,0 +1,5 @@
+pr: 132638
+summary: Better error message for sequences with only one clause plus UNTIL
+area: EQL
+type: bug
+issues: []
@@ -107,7 +107,6 @@ PUT my-index-000003
 ```
 
 ### Using ELSER on EIS
-
 ```{applies_to}
 stack: preview 9.1
 serverless: preview
@@ -223,6 +222,10 @@ generated from it. When querying, the individual passages will be automatically
 searched for each document, and the most relevant passage will be used to
 compute a score.
 
+Chunks are stored as start and end character offsets rather than as separate
+text strings. These offsets point to the exact location of each chunk within the
+original input text.
+
 For more details on chunking and how to configure chunking settings,
 see [Configuring chunking](https://www.elastic.co/docs/api/doc/elasticsearch/group/endpoint-inference)
 in the Inference API documentation.
@@ -238,7 +241,8 @@ stack: ga 9.1
 
 You can pre-chunk the input by sending it to Elasticsearch as an array of
 strings.
-Example:
+
+For example:
 
 ```console
 PUT test-index
@@ -540,7 +544,6 @@ POST test-index/_search
 This will return verbose chunked embeddings content that is used to perform
 semantic search for `semantic_text` fields.
 
-
 ## Limitations [limitations]
 
 `semantic_text` field types have the following limitations:
 
@@ -17,13 +17,22 @@ FROM <source_index>
 | LOOKUP JOIN <lookup_index> ON <field_name>
 ```
 
+```esql
+FROM <source_index>
+| LOOKUP JOIN <lookup_index> ON <field_name1>, <field_name2>, <field_name3>
+```
+
 **Parameters**
 
 `<lookup_index>`
 :   The name of the lookup index. This must be a specific index name - wildcards, aliases, and remote cluster references are not supported. Indices used for lookups must be configured with the [`lookup` index mode](/reference/elasticsearch/index-settings/index-modules.md#index-mode-setting).
 
-`<field_name>`
-:   The field to join on. This field must exist in both your current query results and in the lookup index. If the field contains multi-valued entries, those entries will not match anything (the added fields will contain `null` for those rows).
+`<field_name>` or `<field_name1>, <field_name2>, <field_name3>`
+:   The field(s) to join on. Can be either:
+  * A single field name
+  * A comma-separated list of field names {applies_to}`stack: ga 9.2`
+:   These fields must exist in both your current query results and in the lookup index. If the fields contains multi-valued entries, those entries will not match anything (the added fields will contain `null` for those rows).
+
 
 **Description**
 
@@ -32,7 +41,7 @@ results table by finding documents in a lookup index that share the same
 join field value as your result rows.
 
 For each row in your results table that matches a document in the lookup
-index based on the join field, all fields from the matching document are
+index based on the join fields, all fields from the matching document are
 added as new columns to that row.
 
 If multiple documents in the lookup index match a single row in your
 
@@ -33,11 +33,14 @@ For example, you can use `LOOKUP JOIN` to:
 The `LOOKUP JOIN` command adds fields from the lookup index as new columns to your results table based on matching values in the join field.
 
 The command requires two parameters:
-- The name of the lookup index (which must have the `lookup` [`index.mode setting`](/reference/elasticsearch/index-settings/index-modules.md#index-mode-setting))
-- The name of the field to join on
-
+* The name of the lookup index (which must have the `lookup` [`index.mode setting`](/reference/elasticsearch/index-settings/index-modules.md#index-mode-setting))
+* The field(s) to join on. Can be either:
+  * A single field name
+  * A comma-separated list of field names {applies_to}`stack: ga 9.2`
+  
 ```esql
-LOOKUP JOIN <lookup_index> ON <field_name>
+LOOKUP JOIN <lookup_index> ON <field_name>  # Join on a single field
+LOOKUP JOIN <lookup_index> ON <field_name1>, <field_name2>, <field_name3>  # Join on multiple fields
 ```
 
 :::{image} ../images/esql-lookup-join.png
@@ -200,7 +203,7 @@ The following are the current limitations with `LOOKUP JOIN`:
 * Indices in [`lookup` mode](/reference/elasticsearch/index-settings/index-modules.md#index-mode-setting) are always single-sharded.
 * Cross cluster search is unsupported initially. Both source and lookup indices must be local.
 * Currently, only matching on equality is supported.
-* `LOOKUP JOIN` can only use a single match field and a single index. Wildcards are not supported.
+* In Stack versions `9.0-9.1`,`LOOKUP JOIN` can only use a single match field and a single index. Wildcards are not supported.
   * Aliases, datemath, and datastreams are supported, as long as the index pattern matches a single concrete index {applies_to}`stack: ga 9.1.0`.
 * The name of the match field in `LOOKUP JOIN lu_idx ON match_field` must match an existing field in the query. This may require `RENAME`s or `EVAL`s to achieve.
 * The query will circuit break if there are too many matching documents in the lookup index, or if the documents are too large. More precisely, `LOOKUP JOIN` works in batches of, normally, about 10,000 rows; a large amount of heap space is needed if the matching documents from the lookup index for a batch are multiple megabytes or larger. This is roughly the same as for `ENRICH`.