Skip to content

Commit a33a5a6

Browse files
committed
Merge remote-tracking branch 'upstream/main' into lucene_snapshot_10_3
2 parents d5964cf + 4e61088 commit a33a5a6

File tree

212 files changed

+6505
-1652
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

212 files changed

+6505
-1652
lines changed
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.benchmark.vector;
11+
12+
import org.elasticsearch.common.logging.LogConfigurator;
13+
import org.elasticsearch.index.codec.vectors.cluster.NeighborHood;
14+
import org.openjdk.jmh.annotations.Benchmark;
15+
import org.openjdk.jmh.annotations.BenchmarkMode;
16+
import org.openjdk.jmh.annotations.Fork;
17+
import org.openjdk.jmh.annotations.Measurement;
18+
import org.openjdk.jmh.annotations.Mode;
19+
import org.openjdk.jmh.annotations.OutputTimeUnit;
20+
import org.openjdk.jmh.annotations.Param;
21+
import org.openjdk.jmh.annotations.Scope;
22+
import org.openjdk.jmh.annotations.Setup;
23+
import org.openjdk.jmh.annotations.State;
24+
import org.openjdk.jmh.annotations.Warmup;
25+
import org.openjdk.jmh.infra.Blackhole;
26+
27+
import java.io.IOException;
28+
import java.util.Random;
29+
import java.util.concurrent.TimeUnit;
30+
31+
@BenchmarkMode(Mode.AverageTime)
32+
@OutputTimeUnit(TimeUnit.SECONDS)
33+
@State(Scope.Benchmark)
34+
// first iteration is complete garbage, so make sure we really warmup
35+
@Warmup(iterations = 1, time = 1)
36+
// real iterations. not useful to spend tons of time here, better to fork more
37+
@Measurement(iterations = 3, time = 1)
38+
// engage some noise reduction
39+
@Fork(value = 1)
40+
public class ComputeNeighboursBenchmark {
41+
42+
static {
43+
LogConfigurator.configureESLogging(); // native access requires logging to be initialized
44+
}
45+
46+
@Param({ "1000", "2000", "3000", "5000", "10000", "20000", "50000" })
47+
int numVectors;
48+
49+
@Param({ "384", "782", "1024" })
50+
int dims;
51+
52+
float[][] vectors;
53+
int clusterPerNeighbour = 128;
54+
55+
@Setup
56+
public void setup() throws IOException {
57+
Random random = new Random(123);
58+
vectors = new float[numVectors][dims];
59+
for (float[] vector : vectors) {
60+
for (int i = 0; i < dims; i++) {
61+
vector[i] = random.nextFloat();
62+
}
63+
}
64+
}
65+
66+
@Benchmark
67+
@Fork(jvmArgsPrepend = { "--add-modules=jdk.incubator.vector" })
68+
public void bruteForce(Blackhole bh) {
69+
bh.consume(NeighborHood.computeNeighborhoodsBruteForce(vectors, clusterPerNeighbour));
70+
}
71+
72+
@Benchmark
73+
@Fork(jvmArgsPrepend = { "--add-modules=jdk.incubator.vector" })
74+
public void graph(Blackhole bh) throws IOException {
75+
bh.consume(NeighborHood.computeNeighborhoodsGraph(vectors, clusterPerNeighbour));
76+
}
77+
}

build-tools-internal/version.properties

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ log4j = 2.19.0
1414
slf4j = 2.0.6
1515
ecsLogging = 1.2.0
1616
jna = 5.12.1
17-
netty = 4.1.118.Final
17+
netty = 4.1.126.Final
1818
commons_lang3 = 3.9
1919
google_oauth_client = 1.34.1
2020
awsv2sdk = 2.31.78

docs/changelog/132003.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 132003
2+
summary: Add `copy_from` option to the Append processor
3+
area: Ingest Node
4+
type: enhancement
5+
issues: []

docs/changelog/133546.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 133546
2+
summary: "Support geohash, geotile and geohex grid types in ST_INTERSECTS and ST_DISJOINT"
3+
area: "ES|QL"
4+
type: enhancement
5+
issues: []

docs/changelog/133599.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 133599
2+
summary: Support Gemini thinking budget in inference API
3+
area: Machine Learning
4+
type: enhancement
5+
issues: []

docs/changelog/133954.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 133954
2+
summary: "ILM: Force merge on zero-replica cloned index before snapshotting for searchable snapshots"
3+
area: ILM+SLM
4+
type: enhancement
5+
issues:
6+
- 75478

docs/changelog/134182.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 134182
2+
summary: Upgrade Netty to 4.1.126.Final
3+
area: Network
4+
type: upgrade
5+
issues: []

docs/changelog/134198.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 134198
2+
summary: Improve `ShardLockObtainFailedException` message
3+
area: Store
4+
type: enhancement
5+
issues: []

docs/reference/elasticsearch/mapping-reference/doc-values.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,13 @@ PUT my-index-000001
7373
1. The `status_code` field has `doc_values` enabled by default.
7474
2. The `session_id` has `doc_values` disabled, but can still be queried.
7575

76+
## Multi-valued doc values note
77+
78+
Elasticsearch supports storing multi-valued fields at index time. Multi-valued fields can be provided as a json array. However in the doc values format, the values aren't stored in the order as was provided at index time. Additionally, duplicates may be lost.
79+
This implementation detail of doc values is visible when features directly interact with doc values, which may be the case for example in ES|QL or aggregations in the search API. Note, that _source always returns arrays in the way that was provided at index time.
80+
81+
How the ordering differs depends on whether the array is mapped as keyword or a numeric field type. In case of the `keyword` field type, the multi-valued values for each document are ordered lexicographically and duplicates are lost. If retaining duplicates is important then the `counted_keyword` field type should be used.
82+
In case of numeric field types (e.g. `long`, `double`, `scaled_float`, etc.), the multi-valued values for each document are ordered in natural order and duplicates are retained.
7683

7784

7885

docs/reference/enrich-processor/append-processor.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,10 @@ $$$append-options$$$
1414
| Name | Required | Default | Description |
1515
| --- | --- | --- | --- |
1616
| `field` | yes | - | The field to be appended to. Supports [template snippets](docs-content://manage-data/ingest/transform-enrich/ingest-pipelines.md#template-snippets). |
17-
| `value` | yes | - | The value to be appended. Supports [template snippets](docs-content://manage-data/ingest/transform-enrich/ingest-pipelines.md#template-snippets). |
17+
| `value` | yes* | - | The value to be appended. Supports [template snippets](docs-content://manage-data/ingest/transform-enrich/ingest-pipelines.md#template-snippets). May specify only one of `value` or `copy_from`. |
18+
| `copy_from` {applies_to}`stack: ga 9.2.0` | no | - | The origin field which will be appended to `field`, cannot set `value` simultaneously. |
1819
| `allow_duplicates` | no | true | If `false`, the processor does not appendvalues already present in the field. |
19-
| `media_type` | no | `application/json` | The media type for encoding `value`. Applies only when `value` is a[template snippet](docs-content://manage-data/ingest/transform-enrich/ingest-pipelines.md#template-snippets). Must be one of `application/json`, `text/plain`, or`application/x-www-form-urlencoded`. |
20+
| `media_type` | no | `application/json` | The media type for encoding `value`. Applies only when `value` is a [template snippet](docs-content://manage-data/ingest/transform-enrich/ingest-pipelines.md#template-snippets). Must be one of `application/json`, `text/plain`, or`application/x-www-form-urlencoded`. |
2021
| `description` | no | - | Description of the processor. Useful for describing the purpose of the processor or its configuration. |
2122
| `if` | no | - | Conditionally execute the processor. See [Conditionally run a processor](docs-content://manage-data/ingest/transform-enrich/ingest-pipelines.md#conditionally-run-processor). |
2223
| `ignore_failure` | no | `false` | Ignore failures for the processor. See [Handling pipeline failures](docs-content://manage-data/ingest/transform-enrich/ingest-pipelines.md#handling-pipeline-failures). |

0 commit comments

Comments
 (0)