Skip to content

Commit 8a071f8

Browse files
committed
Merge branch '8.19' of https://github.com/elastic/elasticsearch into esql-inference-feature-manual-backport
2 parents a829849 + bc7960d commit 8a071f8

File tree

91 files changed

+2884
-1070
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

91 files changed

+2884
-1070
lines changed

benchmarks/src/main/java/org/elasticsearch/benchmark/compute/operator/ValuesAggregatorBenchmark.java

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,13 @@
2121
import org.elasticsearch.compute.data.Block;
2222
import org.elasticsearch.compute.data.BlockFactory;
2323
import org.elasticsearch.compute.data.BytesRefBlock;
24+
import org.elasticsearch.compute.data.BytesRefVector;
2425
import org.elasticsearch.compute.data.ElementType;
2526
import org.elasticsearch.compute.data.IntBlock;
27+
import org.elasticsearch.compute.data.IntVector;
2628
import org.elasticsearch.compute.data.LongBlock;
2729
import org.elasticsearch.compute.data.LongVector;
30+
import org.elasticsearch.compute.data.OrdinalBytesRefVector;
2831
import org.elasticsearch.compute.data.Page;
2932
import org.elasticsearch.compute.operator.AggregationOperator;
3033
import org.elasticsearch.compute.operator.DriverContext;
@@ -275,11 +278,18 @@ private static Block dataBlock(int groups, String dataType) {
275278
int blockLength = blockLength(groups);
276279
return switch (dataType) {
277280
case BYTES_REF -> {
278-
try (BytesRefBlock.Builder builder = blockFactory.newBytesRefBlockBuilder(blockLength)) {
281+
try (
282+
BytesRefVector.Builder dict = blockFactory.newBytesRefVectorBuilder(blockLength);
283+
IntVector.Builder ords = blockFactory.newIntVectorBuilder(blockLength)
284+
) {
285+
final int dictLength = Math.min(blockLength, KEYWORDS.length);
286+
for (int i = 0; i < dictLength; i++) {
287+
dict.appendBytesRef(KEYWORDS[i]);
288+
}
279289
for (int i = 0; i < blockLength; i++) {
280-
builder.appendBytesRef(KEYWORDS[i % KEYWORDS.length]);
290+
ords.appendInt(i % dictLength);
281291
}
282-
yield builder.build();
292+
yield new OrdinalBytesRefVector(ords.build(), dict.build()).asBlock();
283293
}
284294
}
285295
case INT -> {
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.benchmark.xcontent;
11+
12+
import org.elasticsearch.benchmark.index.mapper.MapperServiceFactory;
13+
import org.elasticsearch.common.UUIDs;
14+
import org.elasticsearch.common.bytes.BytesReference;
15+
import org.elasticsearch.common.logging.LogConfigurator;
16+
import org.elasticsearch.index.mapper.MapperService;
17+
import org.elasticsearch.index.mapper.SourceToParse;
18+
import org.elasticsearch.xcontent.XContentBuilder;
19+
import org.elasticsearch.xcontent.XContentFactory;
20+
import org.elasticsearch.xcontent.XContentType;
21+
import org.openjdk.jmh.annotations.Benchmark;
22+
import org.openjdk.jmh.annotations.BenchmarkMode;
23+
import org.openjdk.jmh.annotations.Fork;
24+
import org.openjdk.jmh.annotations.Level;
25+
import org.openjdk.jmh.annotations.Measurement;
26+
import org.openjdk.jmh.annotations.Mode;
27+
import org.openjdk.jmh.annotations.OutputTimeUnit;
28+
import org.openjdk.jmh.annotations.Param;
29+
import org.openjdk.jmh.annotations.Scope;
30+
import org.openjdk.jmh.annotations.Setup;
31+
import org.openjdk.jmh.annotations.State;
32+
import org.openjdk.jmh.annotations.Threads;
33+
import org.openjdk.jmh.annotations.Warmup;
34+
import org.openjdk.jmh.infra.Blackhole;
35+
36+
import java.io.IOException;
37+
import java.util.Random;
38+
import java.util.concurrent.TimeUnit;
39+
40+
/**
41+
* Benchmark to measure indexing performance of keyword fields. Used to measure performance impact of skipping
42+
* UTF-8 to UTF-16 conversion during document parsing.
43+
*/
44+
@BenchmarkMode(Mode.AverageTime)
45+
@OutputTimeUnit(TimeUnit.MILLISECONDS)
46+
@State(Scope.Benchmark)
47+
@Fork(1)
48+
@Threads(1)
49+
@Warmup(iterations = 1)
50+
@Measurement(iterations = 5)
51+
public class OptimizedTextBenchmark {
52+
static {
53+
// For Elasticsearch900Lucene101Codec:
54+
LogConfigurator.loadLog4jPlugins();
55+
LogConfigurator.configureESLogging();
56+
LogConfigurator.setNodeName("test");
57+
}
58+
59+
/**
60+
* Total number of documents to index.
61+
*/
62+
@Param("1048576")
63+
private int nDocs;
64+
65+
private MapperService mapperService;
66+
private SourceToParse[] sources;
67+
68+
private String randomValue(int length) {
69+
final String CHARS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
70+
Random random = new Random();
71+
StringBuilder builder = new StringBuilder(length);
72+
for (int i = 0; i < length; i++) {
73+
builder.append(CHARS.charAt(random.nextInt(CHARS.length())));
74+
}
75+
return builder.toString();
76+
}
77+
78+
@Setup(Level.Trial)
79+
public void setup() throws IOException {
80+
mapperService = MapperServiceFactory.create("""
81+
{
82+
"_doc": {
83+
"dynamic": false,
84+
"properties": {
85+
"field": {
86+
"type": "keyword"
87+
}
88+
}
89+
}
90+
}
91+
""");
92+
93+
sources = new SourceToParse[nDocs];
94+
for (int i = 0; i < nDocs; i++) {
95+
XContentBuilder b = XContentFactory.jsonBuilder();
96+
b.startObject().field("field", randomValue(8)).endObject();
97+
sources[i] = new SourceToParse(UUIDs.randomBase64UUID(), BytesReference.bytes(b), XContentType.JSON);
98+
}
99+
}
100+
101+
@Benchmark
102+
public void indexDocuments(final Blackhole bh) {
103+
final var mapper = mapperService.documentMapper();
104+
for (int i = 0; i < nDocs; i++) {
105+
bh.consume(mapper.parse(sources[i]));
106+
}
107+
}
108+
}

distribution/docker/src/docker/dockerfiles/default/Dockerfile

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -29,18 +29,15 @@ RUN apt-get update -y && DEBIAN_FRONTEND=noninteractive apt-get install -y curl
2929
# The tini GitHub page gives instructions for verifying the binary using
3030
# gpg, but the keyservers are slow to return the key and this can fail the
3131
# build. Instead, we check the binary against the published checksum.
32-
RUN set -eux ; \\
33-
tini_bin="" ; \\
32+
RUN set -eux; \\
3433
case "\$(arch)" in \\
35-
aarch64) tini_bin='tini-arm64' ;; \\
36-
x86_64) tini_bin='tini-amd64' ;; \\
37-
*) echo >&2 ; echo >&2 "Unsupported architecture \$(arch)" ; echo >&2 ; exit 1 ;; \\
34+
aarch64) tini_bin='tini-arm64'; tini_sum='07952557df20bfd2a95f9bef198b445e006171969499a1d361bd9e6f8e5e0e81' ;; \\
35+
x86_64) tini_bin='tini-amd64'; tini_sum='93dcc18adc78c65a028a84799ecf8ad40c936fdfc5f2a57b1acda5a8117fa82c' ;; \\
36+
*) echo >&2 "Unsupported architecture \$arch"; exit 1 ;; \\
3837
esac ; \\
39-
curl --retry 10 -S -L -O https://github.com/krallin/tini/releases/download/v0.19.0/\${tini_bin} ; \\
40-
curl --retry 10 -S -L -O https://github.com/krallin/tini/releases/download/v0.19.0/\${tini_bin}.sha256sum ; \\
41-
sha256sum -c \${tini_bin}.sha256sum ; \\
42-
rm \${tini_bin}.sha256sum ; \\
43-
mv \${tini_bin} /bin/tini ; \\
38+
curl -f --retry 10 -S -L -o /tmp/tini https://github.com/krallin/tini/releases/download/v0.19.0/\${tini_bin}; \\
39+
echo "\${tini_sum} /tmp/tini" | sha256sum -c -; \\
40+
mv /tmp/tini /bin/tini; \\
4441
chmod 0555 /bin/tini
4542
4643
RUN mkdir /usr/share/elasticsearch

docs/changelog/127797.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 127797
2+
summary: "Date nanos implicit casting in union types option #2"
3+
area: ES|QL
4+
type: enhancement
5+
issues:
6+
- 110009

docs/changelog/127849.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 127849
2+
summary: Optimize ordinal inputs in Values aggregation
3+
area: "ES|QL"
4+
type: enhancement
5+
issues: []

docs/changelog/128890.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 128890
2+
summary: Improve cache invalidation in IdP SP cache
3+
area: IdentityProvider
4+
type: bug
5+
issues: []

docs/reference/esql/functions/description/match.asciidoc

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/reference/esql/functions/kibana/definition/match.json

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/reference/esql/functions/kibana/docs/match.md

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/reference/mapping/types/semantic-text.asciidoc

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ If you don’t specify an inference endpoint, the `inference_id` field defaults
1818

1919
Using `semantic_text`, you won't need to specify how to generate embeddings for your data, or how to index it.
2020
The {infer} endpoint automatically determines the embedding generation, indexing, and query to use.
21+
Newly created indices with `semantic_text` fields using dense embeddings will be <<dense-vector-quantization,quantized>> to `bbq_hnsw` automatically.
2122

2223
If you use the preconfigured `.elser-2-elasticsearch` endpoint, you can set up `semantic_text` with the following API request:
2324

@@ -225,7 +226,8 @@ In these cases - when you use `sparse_vector` or `dense_vector` field types inst
225226
For indices containing `semantic_text` fields, updates that use scripts have the following behavior:
226227

227228
* Are supported through the https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-update[Update API].
228-
* Are not supported through the https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-bulk-1[Bulk API] and will fail. Even if the script targets non-`semantic_text` fields, the update will fail when the index contains a `semantic_text` field.
229+
* Are not supported through the https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-bulk-1[Bulk API] and will fail.
230+
Even if the script targets non-`semantic_text` fields, the update will fail when the index contains a `semantic_text` field.
229231

230232
[discrete]
231233
[[copy-to-support]]

0 commit comments

Comments
 (0)