Skip to content

Commit ec0adfb

Browse files
authored
Merge branch 'main' into writerWithOffset
2 parents b00af73 + ac6c7a9 commit ec0adfb

File tree

189 files changed

+7015
-2678
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

189 files changed

+7015
-2678
lines changed

BUILDING.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ To wire this registered cluster into a `TestClusterAware` task (e.g. `RestIntegT
144144
Additional integration tests for a certain Elasticsearch modules that are specific to certain cluster configuration can be declared in a separate so called `qa` subproject of your module.
145145

146146
The benefit of a dedicated project for these tests are:
147-
- `qa` projects are dedicated two specific use-cases and easier to maintain
147+
- `qa` projects are dedicated to specific use-cases and easier to maintain
148148
- It keeps the specific test logic separated from the common test logic.
149149
- You can run those tests in parallel to other projects of the build.
150150

benchmarks/build.gradle

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ dependencies {
4242
api(project(':libs:h3'))
4343
api(project(':modules:aggregations'))
4444
api(project(':x-pack:plugin:esql-core'))
45+
api(project(':x-pack:plugin:core'))
4546
api(project(':x-pack:plugin:esql'))
4647
api(project(':x-pack:plugin:esql:compute'))
4748
implementation project(path: ':libs:simdvec')

benchmarks/src/main/java/org/elasticsearch/benchmark/compute/operator/ValuesSourceReaderBenchmark.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import org.apache.lucene.util.NumericUtils;
2626
import org.elasticsearch.common.breaker.NoopCircuitBreaker;
2727
import org.elasticsearch.common.lucene.Lucene;
28+
import org.elasticsearch.common.settings.Settings;
2829
import org.elasticsearch.common.util.BigArrays;
2930
import org.elasticsearch.compute.data.BlockFactory;
3031
import org.elasticsearch.compute.data.BytesRefBlock;
@@ -50,6 +51,7 @@
5051
import org.elasticsearch.index.mapper.MappedFieldType;
5152
import org.elasticsearch.index.mapper.NumberFieldMapper;
5253
import org.elasticsearch.search.lookup.SearchLookup;
54+
import org.elasticsearch.xpack.esql.plugin.EsqlPlugin;
5355
import org.openjdk.jmh.annotations.Benchmark;
5456
import org.openjdk.jmh.annotations.BenchmarkMode;
5557
import org.openjdk.jmh.annotations.Fork;
@@ -335,7 +337,7 @@ public void benchmark() {
335337
fields(name),
336338
List.of(new ValuesSourceReaderOperator.ShardContext(reader, () -> {
337339
throw new UnsupportedOperationException("can't load _source here");
338-
})),
340+
}, EsqlPlugin.STORED_FIELDS_SEQUENTIAL_PROPORTION.getDefault(Settings.EMPTY))),
339341
0
340342
);
341343
long sum = 0;
Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.benchmark.esql;
11+
12+
import org.elasticsearch.common.logging.LogConfigurator;
13+
import org.elasticsearch.common.settings.Settings;
14+
import org.elasticsearch.index.IndexMode;
15+
import org.elasticsearch.license.XPackLicenseState;
16+
import org.elasticsearch.xpack.esql.analysis.Analyzer;
17+
import org.elasticsearch.xpack.esql.analysis.AnalyzerContext;
18+
import org.elasticsearch.xpack.esql.analysis.EnrichResolution;
19+
import org.elasticsearch.xpack.esql.analysis.Verifier;
20+
import org.elasticsearch.xpack.esql.core.expression.FoldContext;
21+
import org.elasticsearch.xpack.esql.core.type.EsField;
22+
import org.elasticsearch.xpack.esql.core.util.DateUtils;
23+
import org.elasticsearch.xpack.esql.expression.function.EsqlFunctionRegistry;
24+
import org.elasticsearch.xpack.esql.index.EsIndex;
25+
import org.elasticsearch.xpack.esql.index.IndexResolution;
26+
import org.elasticsearch.xpack.esql.inference.InferenceResolution;
27+
import org.elasticsearch.xpack.esql.optimizer.LogicalOptimizerContext;
28+
import org.elasticsearch.xpack.esql.optimizer.LogicalPlanOptimizer;
29+
import org.elasticsearch.xpack.esql.parser.EsqlParser;
30+
import org.elasticsearch.xpack.esql.parser.QueryParams;
31+
import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
32+
import org.elasticsearch.xpack.esql.plugin.EsqlPlugin;
33+
import org.elasticsearch.xpack.esql.plugin.QueryPragmas;
34+
import org.elasticsearch.xpack.esql.session.Configuration;
35+
import org.elasticsearch.xpack.esql.telemetry.Metrics;
36+
import org.elasticsearch.xpack.esql.telemetry.PlanTelemetry;
37+
import org.openjdk.jmh.annotations.Benchmark;
38+
import org.openjdk.jmh.annotations.BenchmarkMode;
39+
import org.openjdk.jmh.annotations.Fork;
40+
import org.openjdk.jmh.annotations.Measurement;
41+
import org.openjdk.jmh.annotations.Mode;
42+
import org.openjdk.jmh.annotations.OutputTimeUnit;
43+
import org.openjdk.jmh.annotations.Scope;
44+
import org.openjdk.jmh.annotations.Setup;
45+
import org.openjdk.jmh.annotations.State;
46+
import org.openjdk.jmh.annotations.Warmup;
47+
import org.openjdk.jmh.infra.Blackhole;
48+
49+
import java.util.LinkedHashMap;
50+
import java.util.Locale;
51+
import java.util.Map;
52+
import java.util.concurrent.TimeUnit;
53+
54+
import static java.util.Collections.emptyMap;
55+
import static org.elasticsearch.xpack.esql.core.type.DataType.TEXT;
56+
57+
@Fork(1)
58+
@Warmup(iterations = 5)
59+
@Measurement(iterations = 10)
60+
@BenchmarkMode(Mode.AverageTime)
61+
@OutputTimeUnit(TimeUnit.MILLISECONDS)
62+
@State(Scope.Benchmark)
63+
public class QueryPlanningBenchmark {
64+
65+
static {
66+
LogConfigurator.configureESLogging();
67+
}
68+
69+
private PlanTelemetry telemetry;
70+
private EsqlParser parser;
71+
private Analyzer analyzer;
72+
private LogicalPlanOptimizer optimizer;
73+
74+
@Setup
75+
public void setup() {
76+
77+
var config = new Configuration(
78+
DateUtils.UTC,
79+
Locale.US,
80+
null,
81+
null,
82+
new QueryPragmas(Settings.EMPTY),
83+
EsqlPlugin.QUERY_RESULT_TRUNCATION_MAX_SIZE.getDefault(Settings.EMPTY),
84+
EsqlPlugin.QUERY_RESULT_TRUNCATION_DEFAULT_SIZE.getDefault(Settings.EMPTY),
85+
"",
86+
false,
87+
Map.of(),
88+
System.nanoTime(),
89+
false
90+
);
91+
92+
var fields = 10_000;
93+
var mapping = LinkedHashMap.<String, EsField>newLinkedHashMap(fields);
94+
for (int i = 0; i < fields; i++) {
95+
mapping.put("field" + i, new EsField("field-" + i, TEXT, emptyMap(), true));
96+
}
97+
98+
var esIndex = new EsIndex("test", mapping, Map.of("test", IndexMode.STANDARD));
99+
100+
var functionRegistry = new EsqlFunctionRegistry();
101+
102+
telemetry = new PlanTelemetry(functionRegistry);
103+
parser = new EsqlParser();
104+
analyzer = new Analyzer(
105+
new AnalyzerContext(
106+
config,
107+
functionRegistry,
108+
IndexResolution.valid(esIndex),
109+
Map.of(),
110+
new EnrichResolution(),
111+
InferenceResolution.EMPTY
112+
),
113+
new Verifier(new Metrics(functionRegistry), new XPackLicenseState(() -> 0L))
114+
);
115+
optimizer = new LogicalPlanOptimizer(new LogicalOptimizerContext(config, FoldContext.small()));
116+
}
117+
118+
private LogicalPlan plan(String query) {
119+
var parsed = parser.createStatement(query, new QueryParams(), telemetry);
120+
var analyzed = analyzer.analyze(parsed);
121+
var optimized = optimizer.optimize(analyzed);
122+
return optimized;
123+
}
124+
125+
@Benchmark
126+
public void run(Blackhole blackhole) {
127+
blackhole.consume(plan("FROM test | LIMIT 10"));
128+
}
129+
}
Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
package org.elasticsearch.benchmark.vector;
10+
11+
import org.apache.lucene.index.VectorSimilarityFunction;
12+
import org.apache.lucene.store.Directory;
13+
import org.apache.lucene.store.IOContext;
14+
import org.apache.lucene.store.IndexInput;
15+
import org.apache.lucene.store.IndexOutput;
16+
import org.apache.lucene.store.MMapDirectory;
17+
import org.apache.lucene.util.VectorUtil;
18+
import org.apache.lucene.util.quantization.OptimizedScalarQuantizer;
19+
import org.elasticsearch.common.logging.LogConfigurator;
20+
import org.elasticsearch.simdvec.internal.vectorization.ES91OSQVectorsScorer;
21+
import org.elasticsearch.simdvec.internal.vectorization.ESVectorizationProvider;
22+
import org.openjdk.jmh.annotations.Benchmark;
23+
import org.openjdk.jmh.annotations.BenchmarkMode;
24+
import org.openjdk.jmh.annotations.Fork;
25+
import org.openjdk.jmh.annotations.Measurement;
26+
import org.openjdk.jmh.annotations.Mode;
27+
import org.openjdk.jmh.annotations.OutputTimeUnit;
28+
import org.openjdk.jmh.annotations.Param;
29+
import org.openjdk.jmh.annotations.Scope;
30+
import org.openjdk.jmh.annotations.Setup;
31+
import org.openjdk.jmh.annotations.State;
32+
import org.openjdk.jmh.annotations.Warmup;
33+
import org.openjdk.jmh.infra.Blackhole;
34+
35+
import java.io.IOException;
36+
import java.nio.file.Files;
37+
import java.util.Random;
38+
import java.util.concurrent.TimeUnit;
39+
40+
@BenchmarkMode(Mode.Throughput)
41+
@OutputTimeUnit(TimeUnit.MILLISECONDS)
42+
@State(Scope.Benchmark)
43+
// first iteration is complete garbage, so make sure we really warmup
44+
@Warmup(iterations = 4, time = 1)
45+
// real iterations. not useful to spend tons of time here, better to fork more
46+
@Measurement(iterations = 5, time = 1)
47+
// engage some noise reduction
48+
@Fork(value = 1)
49+
public class OSQScorerBenchmark {
50+
51+
static {
52+
LogConfigurator.configureESLogging(); // native access requires logging to be initialized
53+
}
54+
55+
@Param({ "1024" })
56+
int dims;
57+
58+
int length;
59+
60+
int numVectors = ES91OSQVectorsScorer.BULK_SIZE * 10;
61+
int numQueries = 10;
62+
63+
byte[][] binaryVectors;
64+
byte[][] binaryQueries;
65+
OptimizedScalarQuantizer.QuantizationResult result;
66+
float centroidDp;
67+
68+
byte[] scratch;
69+
ES91OSQVectorsScorer scorer;
70+
71+
IndexInput in;
72+
73+
float[] scratchScores;
74+
float[] corrections;
75+
76+
@Setup
77+
public void setup() throws IOException {
78+
Random random = new Random(123);
79+
80+
this.length = OptimizedScalarQuantizer.discretize(dims, 64) / 8;
81+
82+
binaryVectors = new byte[numVectors][length];
83+
for (byte[] binaryVector : binaryVectors) {
84+
random.nextBytes(binaryVector);
85+
}
86+
87+
Directory dir = new MMapDirectory(Files.createTempDirectory("vectorData"));
88+
IndexOutput out = dir.createOutput("vectors", IOContext.DEFAULT);
89+
byte[] correctionBytes = new byte[14 * ES91OSQVectorsScorer.BULK_SIZE];
90+
for (int i = 0; i < numVectors; i += ES91OSQVectorsScorer.BULK_SIZE) {
91+
for (int j = 0; j < ES91OSQVectorsScorer.BULK_SIZE; j++) {
92+
out.writeBytes(binaryVectors[i + j], 0, binaryVectors[i + j].length);
93+
}
94+
random.nextBytes(correctionBytes);
95+
out.writeBytes(correctionBytes, 0, correctionBytes.length);
96+
}
97+
out.close();
98+
in = dir.openInput("vectors", IOContext.DEFAULT);
99+
100+
binaryQueries = new byte[numVectors][4 * length];
101+
for (byte[] binaryVector : binaryVectors) {
102+
random.nextBytes(binaryVector);
103+
}
104+
result = new OptimizedScalarQuantizer.QuantizationResult(
105+
random.nextFloat(),
106+
random.nextFloat(),
107+
random.nextFloat(),
108+
Short.toUnsignedInt((short) random.nextInt())
109+
);
110+
centroidDp = random.nextFloat();
111+
112+
scratch = new byte[length];
113+
scorer = ESVectorizationProvider.getInstance().newES91OSQVectorsScorer(in, dims);
114+
scratchScores = new float[16];
115+
corrections = new float[3];
116+
}
117+
118+
@Benchmark
119+
@Fork(jvmArgsPrepend = { "--add-modules=jdk.incubator.vector" })
120+
public void scoreFromArray(Blackhole bh) throws IOException {
121+
for (int j = 0; j < numQueries; j++) {
122+
in.seek(0);
123+
for (int i = 0; i < numVectors; i++) {
124+
in.readBytes(scratch, 0, length);
125+
float qDist = VectorUtil.int4BitDotProduct(binaryQueries[j], scratch);
126+
in.readFloats(corrections, 0, corrections.length);
127+
int addition = Short.toUnsignedInt(in.readShort());
128+
float score = scorer.score(
129+
result,
130+
VectorSimilarityFunction.EUCLIDEAN,
131+
centroidDp,
132+
corrections[0],
133+
corrections[1],
134+
addition,
135+
corrections[2],
136+
qDist
137+
);
138+
bh.consume(score);
139+
}
140+
}
141+
}
142+
143+
@Benchmark
144+
@Fork(jvmArgsPrepend = { "--add-modules=jdk.incubator.vector" })
145+
public void scoreFromMemorySegmentOnlyVector(Blackhole bh) throws IOException {
146+
for (int j = 0; j < numQueries; j++) {
147+
in.seek(0);
148+
for (int i = 0; i < numVectors; i++) {
149+
float qDist = scorer.quantizeScore(binaryQueries[j]);
150+
in.readFloats(corrections, 0, corrections.length);
151+
int addition = Short.toUnsignedInt(in.readShort());
152+
float score = scorer.score(
153+
result,
154+
VectorSimilarityFunction.EUCLIDEAN,
155+
centroidDp,
156+
corrections[0],
157+
corrections[1],
158+
addition,
159+
corrections[2],
160+
qDist
161+
);
162+
bh.consume(score);
163+
}
164+
}
165+
}
166+
167+
@Benchmark
168+
@Fork(jvmArgsPrepend = { "--add-modules=jdk.incubator.vector" })
169+
public void scoreFromMemorySegmentOnlyVectorBulk(Blackhole bh) throws IOException {
170+
for (int j = 0; j < numQueries; j++) {
171+
in.seek(0);
172+
for (int i = 0; i < numVectors; i += 16) {
173+
scorer.quantizeScoreBulk(binaryQueries[j], ES91OSQVectorsScorer.BULK_SIZE, scratchScores);
174+
for (int k = 0; k < ES91OSQVectorsScorer.BULK_SIZE; k++) {
175+
in.readFloats(corrections, 0, corrections.length);
176+
int addition = Short.toUnsignedInt(in.readShort());
177+
float score = scorer.score(
178+
result,
179+
VectorSimilarityFunction.EUCLIDEAN,
180+
centroidDp,
181+
corrections[0],
182+
corrections[1],
183+
addition,
184+
corrections[2],
185+
scratchScores[k]
186+
);
187+
bh.consume(score);
188+
}
189+
}
190+
}
191+
}
192+
193+
@Benchmark
194+
@Fork(jvmArgsPrepend = { "--add-modules=jdk.incubator.vector" })
195+
public void scoreFromMemorySegmentAllBulk(Blackhole bh) throws IOException {
196+
for (int j = 0; j < numQueries; j++) {
197+
in.seek(0);
198+
for (int i = 0; i < numVectors; i += 16) {
199+
scorer.scoreBulk(binaryQueries[j], result, VectorSimilarityFunction.EUCLIDEAN, centroidDp, scratchScores);
200+
bh.consume(scratchScores);
201+
}
202+
}
203+
}
204+
}

0 commit comments

Comments
 (0)