Skip to content

Commit 58b31ed

Browse files
Merge branch 'main' into foldable_part2_v2
2 parents a6e1f91 + a995a12 commit 58b31ed

File tree

116 files changed

+3636
-766
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

116 files changed

+3636
-766
lines changed

.buildkite/pull-requests.json

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,19 @@
1515
"trigger_comment_regex": "(run\\W+elasticsearch-ci.+)|(^\\s*((buildkite|@elastic(search)?machine)\\s*)?test\\s+this(\\s+please)?)",
1616
"cancel_intermediate_builds": true,
1717
"cancel_intermediate_builds_on_comment": false
18+
},
19+
{
20+
"enabled": true,
21+
"pipeline_slug": "elasticsearch-performance-esbench-pr",
22+
"allow_org_users": true,
23+
"allowed_repo_permissions": [
24+
"admin",
25+
"write"
26+
],
27+
"set_commit_status": false,
28+
"build_on_commit": false,
29+
"build_on_comment": true,
30+
"trigger_comment_regex": "^(buildkite|@elastic(search)?machine) benchmark this with (?<benchmark>\\w+)( please)?$"
1831
}
1932
]
2033
}
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
package org.elasticsearch.benchmark.vector;
10+
11+
import org.elasticsearch.common.logging.LogConfigurator;
12+
import org.elasticsearch.index.codec.vectors.BQVectorUtils;
13+
import org.openjdk.jmh.annotations.Benchmark;
14+
import org.openjdk.jmh.annotations.BenchmarkMode;
15+
import org.openjdk.jmh.annotations.Fork;
16+
import org.openjdk.jmh.annotations.Measurement;
17+
import org.openjdk.jmh.annotations.Mode;
18+
import org.openjdk.jmh.annotations.OutputTimeUnit;
19+
import org.openjdk.jmh.annotations.Param;
20+
import org.openjdk.jmh.annotations.Scope;
21+
import org.openjdk.jmh.annotations.Setup;
22+
import org.openjdk.jmh.annotations.State;
23+
import org.openjdk.jmh.annotations.Warmup;
24+
import org.openjdk.jmh.infra.Blackhole;
25+
26+
import java.io.IOException;
27+
import java.util.Random;
28+
import java.util.concurrent.TimeUnit;
29+
30+
@BenchmarkMode(Mode.Throughput)
31+
@OutputTimeUnit(TimeUnit.MILLISECONDS)
32+
@State(Scope.Benchmark)
33+
// first iteration is complete garbage, so make sure we really warmup
34+
@Warmup(iterations = 4, time = 1)
35+
// real iterations. not useful to spend tons of time here, better to fork more
36+
@Measurement(iterations = 5, time = 1)
37+
// engage some noise reduction
38+
@Fork(value = 1)
39+
public class PackAsBinaryBenchmark {
40+
41+
static {
42+
LogConfigurator.configureESLogging(); // native access requires logging to be initialized
43+
}
44+
45+
@Param({ "384", "782", "1024" })
46+
int dims;
47+
48+
int length;
49+
50+
int numVectors = 1000;
51+
52+
int[][] qVectors;
53+
byte[] packed;
54+
55+
@Setup
56+
public void setup() throws IOException {
57+
Random random = new Random(123);
58+
59+
this.length = BQVectorUtils.discretize(dims, 64) / 8;
60+
this.packed = new byte[length];
61+
62+
qVectors = new int[numVectors][dims];
63+
for (int[] qVector : qVectors) {
64+
for (int i = 0; i < dims; i++) {
65+
qVector[i] = random.nextInt(2);
66+
}
67+
}
68+
}
69+
70+
@Benchmark
71+
public void packAsBinary(Blackhole bh) {
72+
for (int i = 0; i < numVectors; i++) {
73+
BQVectorUtils.packAsBinary(qVectors[i], packed);
74+
bh.consume(packed);
75+
}
76+
}
77+
78+
@Benchmark
79+
public void packAsBinaryLegacy(Blackhole bh) {
80+
for (int i = 0; i < numVectors; i++) {
81+
BQVectorUtils.packAsBinaryLegacy(qVectors[i], packed);
82+
bh.consume(packed);
83+
}
84+
}
85+
}
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
package org.elasticsearch.benchmark.vector;
10+
11+
import org.elasticsearch.common.logging.LogConfigurator;
12+
import org.elasticsearch.index.codec.vectors.BQSpaceUtils;
13+
import org.elasticsearch.index.codec.vectors.BQVectorUtils;
14+
import org.openjdk.jmh.annotations.Benchmark;
15+
import org.openjdk.jmh.annotations.BenchmarkMode;
16+
import org.openjdk.jmh.annotations.Fork;
17+
import org.openjdk.jmh.annotations.Measurement;
18+
import org.openjdk.jmh.annotations.Mode;
19+
import org.openjdk.jmh.annotations.OutputTimeUnit;
20+
import org.openjdk.jmh.annotations.Param;
21+
import org.openjdk.jmh.annotations.Scope;
22+
import org.openjdk.jmh.annotations.Setup;
23+
import org.openjdk.jmh.annotations.State;
24+
import org.openjdk.jmh.annotations.Warmup;
25+
import org.openjdk.jmh.infra.Blackhole;
26+
27+
import java.io.IOException;
28+
import java.util.Random;
29+
import java.util.concurrent.TimeUnit;
30+
31+
@BenchmarkMode(Mode.Throughput)
32+
@OutputTimeUnit(TimeUnit.MILLISECONDS)
33+
@State(Scope.Benchmark)
34+
// first iteration is complete garbage, so make sure we really warmup
35+
@Warmup(iterations = 4, time = 1)
36+
// real iterations. not useful to spend tons of time here, better to fork more
37+
@Measurement(iterations = 5, time = 1)
38+
// engage some noise reduction
39+
@Fork(value = 1)
40+
public class TransposeHalfByteBenchmark {
41+
42+
static {
43+
LogConfigurator.configureESLogging(); // native access requires logging to be initialized
44+
}
45+
46+
@Param({ "384", "782", "1024" })
47+
int dims;
48+
49+
int length;
50+
51+
int numVectors = 1000;
52+
53+
int[][] qVectors;
54+
byte[] packed;
55+
56+
@Setup
57+
public void setup() throws IOException {
58+
Random random = new Random(123);
59+
60+
this.length = 4 * BQVectorUtils.discretize(dims, 64) / 8;
61+
this.packed = new byte[length];
62+
63+
qVectors = new int[numVectors][dims];
64+
for (int[] qVector : qVectors) {
65+
for (int i = 0; i < dims; i++) {
66+
qVector[i] = random.nextInt(16);
67+
}
68+
}
69+
}
70+
71+
@Benchmark
72+
public void transposeHalfByte(Blackhole bh) {
73+
for (int i = 0; i < numVectors; i++) {
74+
BQSpaceUtils.transposeHalfByte(qVectors[i], packed);
75+
bh.consume(packed);
76+
}
77+
}
78+
79+
@Benchmark
80+
public void transposeHalfByteLegacy(Blackhole bh) {
81+
for (int i = 0; i < numVectors; i++) {
82+
BQSpaceUtils.transposeHalfByteLegacy(qVectors[i], packed);
83+
bh.consume(packed);
84+
}
85+
}
86+
}

docs/changelog/131559.yaml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
pr: 131559
2+
summary: Add support for LOOKUP JOIN on multiple fields
3+
area: ES|QL
4+
type: enhancement
5+
issues: [ ]
6+
highlight:
7+
title: Add support for Lookup Join on Multiple Fields
8+
body: "Add support for Lookup Join on Multiple Fields e.g. FROM index1\n| LOOKUP\
9+
\ JOIN lookup_index on field1, field2"
10+
notable: true

docs/changelog/132011.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 132011
2+
summary: Restrict Indexing To Child Streams When Streams Is Enabled
3+
area: Data streams
4+
type: enhancement
5+
issues: []

docs/changelog/132064.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 132064
2+
summary: Only Allow Enabling Streams If No Conflicting Indices Exist
3+
area: Data streams
4+
type: enhancement
5+
issues: []

docs/changelog/132638.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 132638
2+
summary: Better error message for sequences with only one clause plus UNTIL
3+
area: EQL
4+
type: bug
5+
issues: []

docs/reference/elasticsearch/mapping-reference/semantic-text.md

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,6 @@ PUT my-index-000003
107107
```
108108

109109
### Using ELSER on EIS
110-
111110
```{applies_to}
112111
stack: preview 9.1
113112
serverless: preview
@@ -223,6 +222,10 @@ generated from it. When querying, the individual passages will be automatically
223222
searched for each document, and the most relevant passage will be used to
224223
compute a score.
225224

225+
Chunks are stored as start and end character offsets rather than as separate
226+
text strings. These offsets point to the exact location of each chunk within the
227+
original input text.
228+
226229
For more details on chunking and how to configure chunking settings,
227230
see [Configuring chunking](https://www.elastic.co/docs/api/doc/elasticsearch/group/endpoint-inference)
228231
in the Inference API documentation.
@@ -238,7 +241,8 @@ stack: ga 9.1
238241

239242
You can pre-chunk the input by sending it to Elasticsearch as an array of
240243
strings.
241-
Example:
244+
245+
For example:
242246

243247
```console
244248
PUT test-index
@@ -540,7 +544,6 @@ POST test-index/_search
540544
This will return verbose chunked embeddings content that is used to perform
541545
semantic search for `semantic_text` fields.
542546

543-
544547
## Limitations [limitations]
545548

546549
`semantic_text` field types have the following limitations:

docs/reference/query-languages/esql/_snippets/commands/layout/lookup-join.md

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,22 @@ FROM <source_index>
1717
| LOOKUP JOIN <lookup_index> ON <field_name>
1818
```
1919

20+
```esql
21+
FROM <source_index>
22+
| LOOKUP JOIN <lookup_index> ON <field_name1>, <field_name2>, <field_name3>
23+
```
24+
2025
**Parameters**
2126

2227
`<lookup_index>`
2328
: The name of the lookup index. This must be a specific index name - wildcards, aliases, and remote cluster references are not supported. Indices used for lookups must be configured with the [`lookup` index mode](/reference/elasticsearch/index-settings/index-modules.md#index-mode-setting).
2429

25-
`<field_name>`
26-
: The field to join on. This field must exist in both your current query results and in the lookup index. If the field contains multi-valued entries, those entries will not match anything (the added fields will contain `null` for those rows).
30+
`<field_name>` or `<field_name1>, <field_name2>, <field_name3>`
31+
: The field(s) to join on. Can be either:
32+
* A single field name
33+
* A comma-separated list of field names {applies_to}`stack: ga 9.2`
34+
: These fields must exist in both your current query results and in the lookup index. If the fields contains multi-valued entries, those entries will not match anything (the added fields will contain `null` for those rows).
35+
2736

2837
**Description**
2938

@@ -32,7 +41,7 @@ results table by finding documents in a lookup index that share the same
3241
join field value as your result rows.
3342

3443
For each row in your results table that matches a document in the lookup
35-
index based on the join field, all fields from the matching document are
44+
index based on the join fields, all fields from the matching document are
3645
added as new columns to that row.
3746

3847
If multiple documents in the lookup index match a single row in your

docs/reference/query-languages/esql/esql-lookup-join.md

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,14 @@ For example, you can use `LOOKUP JOIN` to:
3333
The `LOOKUP JOIN` command adds fields from the lookup index as new columns to your results table based on matching values in the join field.
3434

3535
The command requires two parameters:
36-
- The name of the lookup index (which must have the `lookup` [`index.mode setting`](/reference/elasticsearch/index-settings/index-modules.md#index-mode-setting))
37-
- The name of the field to join on
38-
36+
* The name of the lookup index (which must have the `lookup` [`index.mode setting`](/reference/elasticsearch/index-settings/index-modules.md#index-mode-setting))
37+
* The field(s) to join on. Can be either:
38+
* A single field name
39+
* A comma-separated list of field names {applies_to}`stack: ga 9.2`
40+
3941
```esql
40-
LOOKUP JOIN <lookup_index> ON <field_name>
42+
LOOKUP JOIN <lookup_index> ON <field_name> # Join on a single field
43+
LOOKUP JOIN <lookup_index> ON <field_name1>, <field_name2>, <field_name3> # Join on multiple fields
4144
```
4245

4346
:::{image} ../images/esql-lookup-join.png
@@ -200,7 +203,7 @@ The following are the current limitations with `LOOKUP JOIN`:
200203
* Indices in [`lookup` mode](/reference/elasticsearch/index-settings/index-modules.md#index-mode-setting) are always single-sharded.
201204
* Cross cluster search is unsupported initially. Both source and lookup indices must be local.
202205
* Currently, only matching on equality is supported.
203-
* `LOOKUP JOIN` can only use a single match field and a single index. Wildcards are not supported.
206+
* In Stack versions `9.0-9.1`,`LOOKUP JOIN` can only use a single match field and a single index. Wildcards are not supported.
204207
* Aliases, datemath, and datastreams are supported, as long as the index pattern matches a single concrete index {applies_to}`stack: ga 9.1.0`.
205208
* The name of the match field in `LOOKUP JOIN lu_idx ON match_field` must match an existing field in the query. This may require `RENAME`s or `EVAL`s to achieve.
206209
* The query will circuit break if there are too many matching documents in the lookup index, or if the documents are too large. More precisely, `LOOKUP JOIN` works in batches of, normally, about 10,000 rows; a large amount of heap space is needed if the matching documents from the lookup index for a batch are multiple megabytes or larger. This is roughly the same as for `ENRICH`.

0 commit comments

Comments
 (0)