Skip to content

Commit f3c0da5

Browse files
authored
Merge branch 'main' into default_elser_on_eis_semantic
2 parents 0edfb91 + 946bb09 commit f3c0da5

File tree

2,014 files changed

+35924
-13298
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

2,014 files changed

+35924
-13298
lines changed

.buildkite/scripts/dra-workflow.trigger.sh

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,4 +46,18 @@ for BRANCH in "${BRANCHES[@]}"; do
4646
DRA_WORKFLOW: staging
4747
VERSION_QUALIFIER: ${VERSION_QUALIFIER:-}
4848
EOF
49+
50+
if [ "$BRANCH" = "7.17" ]; then
51+
cat <<EOF
52+
- trigger: elasticsearch-dra-workflow
53+
label: Trigger DRA snapshot workflow for $BRANCH
54+
async: true
55+
build:
56+
branch: "$BRANCH"
57+
commit: "$LAST_GOOD_COMMIT"
58+
env:
59+
DRA_WORKFLOW: snapshot
60+
VERSION_QUALIFIER: ${VERSION_QUALIFIER:-}
61+
EOF
62+
fi
4963
done

.buildkite/scripts/generate-pr-performance-benchmark.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ steps:
4646
CONFIGURATION_NAME: ${GITHUB_PR_COMMENT_VAR_BENCHMARK}
4747
ENV_ID: ${env_id_baseline}
4848
REVISION: ${merge_base}
49+
BENCHMARK_TYPE: baseline
4950
- label: Trigger contender benchmark with ${GITHUB_PR_TRIGGERED_SHA:0:7}
5051
trigger: elasticsearch-performance-esbench-pr
5152
build:
@@ -56,6 +57,7 @@ steps:
5657
ENV_ID: ${env_id_contender}
5758
ES_REPO_URL: https://github.com/${GITHUB_PR_OWNER}/${GITHUB_PR_REPO}.git
5859
REVISION: ${GITHUB_PR_TRIGGERED_SHA}
60+
BENCHMARK_TYPE: contender
5961
- wait: ~
6062
- label: Update PR comment and Buildkite annotation
6163
command: |

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ build/
4646
**/.local*
4747
.vagrant/
4848
/logs/
49+
**/target/
4950

5051
# osx stuff
5152
.DS_Store

.idea/inspectionProfiles/Project_Default.xml

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

benchmarks/src/main/java/org/elasticsearch/benchmark/_nightly/esql/QueryPlanningBenchmark.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ public void setup() {
119119
}
120120

121121
private LogicalPlan plan(EsqlParser parser, Analyzer analyzer, LogicalPlanOptimizer optimizer, String query) {
122-
var parsed = parser.createStatement(query, new QueryParams(), telemetry, config);
122+
var parsed = parser.createStatement(query, new QueryParams(), telemetry);
123123
var analyzed = analyzer.analyze(parsed);
124124
var optimized = optimizer.optimize(analyzed);
125125
return optimized;

benchmarks/src/main/java/org/elasticsearch/benchmark/_nightly/esql/ValuesSourceReaderBenchmark.java

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,9 @@
4141
import org.elasticsearch.compute.data.LongBlock;
4242
import org.elasticsearch.compute.data.LongVector;
4343
import org.elasticsearch.compute.data.Page;
44+
import org.elasticsearch.compute.lucene.AlwaysReferencedIndexedByShardId;
45+
import org.elasticsearch.compute.lucene.IndexedByShardIdFromSingleton;
4446
import org.elasticsearch.compute.lucene.LuceneSourceOperator;
45-
import org.elasticsearch.compute.lucene.ShardRefCounted;
4647
import org.elasticsearch.compute.lucene.read.ValuesSourceReaderOperator;
4748
import org.elasticsearch.compute.lucene.read.ValuesSourceReaderOperatorStatus;
4849
import org.elasticsearch.compute.operator.topn.TopNOperator;
@@ -368,7 +369,7 @@ public void benchmark() {
368369
blockFactory,
369370
ByteSizeValue.ofMb(1).getBytes(),
370371
fields(name),
371-
List.of(new ValuesSourceReaderOperator.ShardContext(reader, () -> {
372+
new IndexedByShardIdFromSingleton<>(new ValuesSourceReaderOperator.ShardContext(reader, () -> {
372373
throw new UnsupportedOperationException("can't load _source here");
373374
}, EsqlPlugin.STORED_FIELDS_SEQUENTIAL_PROPORTION.getDefault(Settings.EMPTY))),
374375
0
@@ -538,7 +539,7 @@ private void setupPages() {
538539
pages.add(
539540
new Page(
540541
new DocVector(
541-
ShardRefCounted.ALWAYS_REFERENCED,
542+
AlwaysReferencedIndexedByShardId.INSTANCE,
542543
blockFactory.newConstantIntBlockWith(0, end - begin).asVector(),
543544
blockFactory.newConstantIntBlockWith(ctx.ord, end - begin).asVector(),
544545
docs.build(),
@@ -575,8 +576,7 @@ record ItrAndOrd(PrimitiveIterator.OfInt itr, int ord) {}
575576
pages.add(
576577
new Page(
577578
new DocVector(
578-
579-
ShardRefCounted.ALWAYS_REFERENCED,
579+
AlwaysReferencedIndexedByShardId.INSTANCE,
580580
blockFactory.newConstantIntVector(0, size),
581581
leafs.build(),
582582
docs.build(),
@@ -594,7 +594,7 @@ record ItrAndOrd(PrimitiveIterator.OfInt itr, int ord) {}
594594
pages.add(
595595
new Page(
596596
new DocVector(
597-
ShardRefCounted.ALWAYS_REFERENCED,
597+
AlwaysReferencedIndexedByShardId.INSTANCE,
598598
blockFactory.newConstantIntBlockWith(0, size).asVector(),
599599
leafs.build().asBlock().asVector(),
600600
docs.build(),
@@ -621,8 +621,7 @@ record ItrAndOrd(PrimitiveIterator.OfInt itr, int ord) {}
621621
pages.add(
622622
new Page(
623623
new DocVector(
624-
625-
ShardRefCounted.ALWAYS_REFERENCED,
624+
AlwaysReferencedIndexedByShardId.INSTANCE,
626625
blockFactory.newConstantIntVector(0, 1),
627626
blockFactory.newConstantIntVector(next.ord, 1),
628627
blockFactory.newConstantIntVector(next.itr.nextInt(), 1),

benchmarks/src/main/java/org/elasticsearch/benchmark/bytes/RecyclerBytesStreamOutputBenchmark.java

Lines changed: 5 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
package org.elasticsearch.benchmark.bytes;
1111

1212
import org.apache.lucene.util.BytesRef;
13+
import org.elasticsearch.benchmark.common.util.UTF8StringBytesBenchmark;
1314
import org.elasticsearch.common.io.stream.RecyclerBytesStreamOutput;
1415
import org.elasticsearch.common.recycler.Recycler;
1516
import org.openjdk.jmh.annotations.Benchmark;
@@ -65,10 +66,10 @@ public void initResults() throws IOException {
6566
// We use weights to generate certain sized UTF-8 characters and vInts. However, there is still some non-determinism which could
6667
// impact direct comparisons run-to-run
6768

68-
shortString = generateAsciiString(20);
69-
longString = generateAsciiString(100);
70-
nonAsciiString = generateUtf8String(200);
71-
veryLongString = generateAsciiString(800);
69+
shortString = UTF8StringBytesBenchmark.generateAsciiString(20);
70+
longString = UTF8StringBytesBenchmark.generateAsciiString(100);
71+
nonAsciiString = UTF8StringBytesBenchmark.generateUTF8String(200);
72+
veryLongString = UTF8StringBytesBenchmark.generateAsciiString(800);
7273
// vint values for benchmarking
7374
vints = new int[1000];
7475
for (int i = 0; i < vints.length; i++) {
@@ -143,49 +144,6 @@ public void writeVInt() throws IOException {
143144
}
144145
}
145146

146-
public static String generateAsciiString(int n) {
147-
ThreadLocalRandom random = ThreadLocalRandom.current();
148-
StringBuilder sb = new StringBuilder(n);
149-
150-
for (int i = 0; i < n; i++) {
151-
int ascii = random.nextInt(128);
152-
sb.append((char) ascii);
153-
}
154-
155-
return sb.toString();
156-
}
157-
158-
public static String generateUtf8String(int n) {
159-
ThreadLocalRandom random = ThreadLocalRandom.current();
160-
StringBuilder sb = new StringBuilder(n);
161-
162-
for (int i = 0; i < n; i++) {
163-
int codePoint;
164-
int probability = random.nextInt(100);
165-
166-
if (probability < 85) {
167-
// 1-byte UTF-8 (ASCII range)
168-
// 0x0000 to 0x007F
169-
codePoint = random.nextInt(0x0080);
170-
} else if (probability < 95) {
171-
// 2-byte UTF-8
172-
// 0x0080 to 0x07FF
173-
codePoint = random.nextInt(0x0080, 0x0800);
174-
} else {
175-
// 3-byte UTF-8
176-
// 0x0800 to 0xFFFF
177-
do {
178-
codePoint = random.nextInt(0x0800, 0x10000);
179-
// Skip surrogate pairs (0xD800-0xDFFF)
180-
} while (codePoint >= 0xD800 && codePoint <= 0xDFFF);
181-
}
182-
183-
sb.appendCodePoint(codePoint);
184-
}
185-
186-
return sb.toString();
187-
}
188-
189147
private record BenchmarkRecycler(AtomicReference<BytesRef> bytesRef) implements Recycler<BytesRef> {
190148

191149
@Override
Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.benchmark.common.util;
11+
12+
import org.apache.lucene.util.BytesRef;
13+
import org.apache.lucene.util.UnicodeUtil;
14+
import org.elasticsearch.common.UUIDs;
15+
import org.openjdk.jmh.annotations.Benchmark;
16+
import org.openjdk.jmh.annotations.BenchmarkMode;
17+
import org.openjdk.jmh.annotations.Fork;
18+
import org.openjdk.jmh.annotations.Measurement;
19+
import org.openjdk.jmh.annotations.Mode;
20+
import org.openjdk.jmh.annotations.OutputTimeUnit;
21+
import org.openjdk.jmh.annotations.Param;
22+
import org.openjdk.jmh.annotations.Scope;
23+
import org.openjdk.jmh.annotations.Setup;
24+
import org.openjdk.jmh.annotations.State;
25+
import org.openjdk.jmh.annotations.Warmup;
26+
27+
import java.nio.ByteBuffer;
28+
import java.nio.charset.StandardCharsets;
29+
import java.util.concurrent.ThreadLocalRandom;
30+
import java.util.concurrent.TimeUnit;
31+
32+
@Warmup(iterations = 3)
33+
@Measurement(iterations = 3)
34+
@BenchmarkMode(Mode.AverageTime)
35+
@OutputTimeUnit(TimeUnit.NANOSECONDS)
36+
@Fork(value = 1)
37+
public class UTF8StringBytesBenchmark {
38+
39+
@State(Scope.Thread)
40+
public static class StringState {
41+
@Param({ "uuid", "short", "long", "nonAscii", "veryLong" })
42+
String stringType;
43+
44+
String string;
45+
BytesRef bytes;
46+
47+
@Setup
48+
public void setup() {
49+
string = switch (stringType) {
50+
case "uuid" -> UUIDs.base64UUID();
51+
case "short" -> generateAsciiString(20);
52+
case "long" -> generateAsciiString(100);
53+
case "nonAscii" -> generateUTF8String(200);
54+
case "veryLong" -> generateAsciiString(1000);
55+
default -> throw new IllegalArgumentException("Unknown stringType: " + stringType);
56+
};
57+
bytes = getBytes(string);
58+
}
59+
}
60+
61+
@Benchmark
62+
public BytesRef getBytesJDK(StringState state) {
63+
byte[] bytes = state.string.getBytes(StandardCharsets.UTF_8);
64+
return new BytesRef(bytes, 0, bytes.length);
65+
}
66+
67+
@Benchmark
68+
public BytesRef getBytesUnicodeUtils(StringState state) {
69+
String string = state.string;
70+
int length = string.length();
71+
int size = UnicodeUtil.calcUTF16toUTF8Length(string, 0, length);
72+
byte[] out = new byte[size];
73+
UnicodeUtil.UTF16toUTF8(string, 0, length, out, 0);
74+
return new BytesRef(out, 0, out.length);
75+
}
76+
77+
@Benchmark
78+
public BytesRef getBytesByteBufferEncoder(StringState state) {
79+
var byteBuff = StandardCharsets.UTF_8.encode(state.string);
80+
assert byteBuff.hasArray();
81+
return new BytesRef(byteBuff.array(), byteBuff.arrayOffset() + byteBuff.position(), byteBuff.remaining());
82+
}
83+
84+
@Benchmark
85+
public String getStringJDK(StringState state) {
86+
BytesRef bytes = state.bytes;
87+
return new String(bytes.bytes, bytes.offset, bytes.length, StandardCharsets.UTF_8);
88+
}
89+
90+
@Benchmark
91+
public String getStringByteBufferDecoder(StringState state) {
92+
BytesRef bytes = state.bytes;
93+
var byteBuff = ByteBuffer.wrap(bytes.bytes, bytes.offset, bytes.length);
94+
return StandardCharsets.UTF_8.decode(byteBuff).toString();
95+
}
96+
97+
private static BytesRef getBytes(String string) {
98+
int before = ThreadLocalRandom.current().nextInt(0, 50);
99+
int after = ThreadLocalRandom.current().nextInt(0, 50);
100+
byte[] stringBytes = string.getBytes(StandardCharsets.UTF_8);
101+
byte[] finalBytes = new byte[before + after + stringBytes.length];
102+
System.arraycopy(stringBytes, 0, finalBytes, before, stringBytes.length);
103+
return new BytesRef(finalBytes, before, stringBytes.length);
104+
}
105+
106+
public static String generateAsciiString(int n) {
107+
ThreadLocalRandom random = ThreadLocalRandom.current();
108+
StringBuilder sb = new StringBuilder(n);
109+
110+
for (int i = 0; i < n; i++) {
111+
int ascii = random.nextInt(128);
112+
sb.append((char) ascii);
113+
}
114+
115+
return sb.toString();
116+
}
117+
118+
public static String generateUTF8String(int n) {
119+
ThreadLocalRandom random = ThreadLocalRandom.current();
120+
StringBuilder sb = new StringBuilder(n);
121+
122+
for (int i = 0; i < n; i++) {
123+
int codePoint;
124+
int probability = random.nextInt(100);
125+
126+
if (probability < 85) {
127+
// 1-byte UTF-8 (ASCII range)
128+
// 0x0000 to 0x007F
129+
codePoint = random.nextInt(0x0080);
130+
} else if (probability < 95) {
131+
// 2-byte UTF-8
132+
// 0x0080 to 0x07FF
133+
codePoint = random.nextInt(0x0080, 0x0800);
134+
} else {
135+
// 3-byte UTF-8
136+
// 0x0800 to 0xFFFF
137+
do {
138+
codePoint = random.nextInt(0x0800, 0x10000);
139+
// Skip surrogate pairs (0xD800-0xDFFF)
140+
} while (codePoint >= 0xD800 && codePoint <= 0xDFFF);
141+
}
142+
143+
sb.appendCodePoint(codePoint);
144+
}
145+
146+
return sb.toString();
147+
}
148+
}

benchmarks/src/main/java/org/elasticsearch/benchmark/script/ScriptScoreBenchmark.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
import org.apache.lucene.index.IndexWriter;
1616
import org.apache.lucene.index.IndexWriterConfig;
1717
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
18-
import org.apache.lucene.index.SortedNumericDocValues;
1918
import org.apache.lucene.search.IndexSearcher;
2019
import org.apache.lucene.search.MatchAllDocsQuery;
2120
import org.apache.lucene.search.Query;
@@ -29,6 +28,7 @@
2928
import org.elasticsearch.index.fielddata.FieldDataContext;
3029
import org.elasticsearch.index.fielddata.IndexFieldDataCache;
3130
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
31+
import org.elasticsearch.index.fielddata.SortedNumericLongValues;
3232
import org.elasticsearch.index.mapper.IndexType;
3333
import org.elasticsearch.index.mapper.MappedFieldType;
3434
import org.elasticsearch.index.mapper.MappingLookup;
@@ -179,14 +179,14 @@ private ScoreScript.Factory bareMetalScript() {
179179
return new ScoreScript.LeafFactory() {
180180
@Override
181181
public ScoreScript newInstance(DocReader docReader) throws IOException {
182-
SortedNumericDocValues values = ifd.load(((DocValuesDocReader) docReader).getLeafReaderContext()).getLongValues();
182+
SortedNumericLongValues values = ifd.load(((DocValuesDocReader) docReader).getLeafReaderContext()).getLongValues();
183183
return new ScoreScript(params, null, docReader) {
184184
private int docId;
185185

186186
@Override
187187
public double execute(ExplanationHolder explanation) {
188188
try {
189-
values.advance(docId);
189+
values.advanceExact(docId);
190190
if (values.docValueCount() != 1) {
191191
throw new IllegalArgumentException("script only works when there is exactly one value");
192192
}

0 commit comments

Comments
 (0)