Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions benchmarks/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ dependencies {
api(project(':libs:h3'))
api(project(':modules:aggregations'))
api(project(':x-pack:plugin:esql-core'))
api(project(':x-pack:plugin:core'))
api(project(':x-pack:plugin:esql'))
api(project(':x-pack:plugin:esql:compute'))
implementation project(path: ':libs:simdvec')
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.benchmark.esql;

import org.elasticsearch.common.logging.LogConfigurator;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.IndexMode;
import org.elasticsearch.license.XPackLicenseState;
import org.elasticsearch.xpack.esql.analysis.Analyzer;
import org.elasticsearch.xpack.esql.analysis.AnalyzerContext;
import org.elasticsearch.xpack.esql.analysis.EnrichResolution;
import org.elasticsearch.xpack.esql.analysis.Verifier;
import org.elasticsearch.xpack.esql.core.expression.FoldContext;
import org.elasticsearch.xpack.esql.core.type.EsField;
import org.elasticsearch.xpack.esql.core.util.DateUtils;
import org.elasticsearch.xpack.esql.expression.function.EsqlFunctionRegistry;
import org.elasticsearch.xpack.esql.index.EsIndex;
import org.elasticsearch.xpack.esql.index.IndexResolution;
import org.elasticsearch.xpack.esql.inference.InferenceResolution;
import org.elasticsearch.xpack.esql.optimizer.LogicalOptimizerContext;
import org.elasticsearch.xpack.esql.optimizer.LogicalPlanOptimizer;
import org.elasticsearch.xpack.esql.parser.EsqlParser;
import org.elasticsearch.xpack.esql.parser.QueryParams;
import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
import org.elasticsearch.xpack.esql.plugin.EsqlPlugin;
import org.elasticsearch.xpack.esql.plugin.QueryPragmas;
import org.elasticsearch.xpack.esql.session.Configuration;
import org.elasticsearch.xpack.esql.telemetry.Metrics;
import org.elasticsearch.xpack.esql.telemetry.PlanTelemetry;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;
import org.openjdk.jmh.infra.Blackhole;

import java.util.LinkedHashMap;
import java.util.Locale;
import java.util.Map;
import java.util.concurrent.TimeUnit;

import static java.util.Collections.emptyMap;
import static org.elasticsearch.xpack.esql.core.type.DataType.TEXT;

@Fork(1)
@Warmup(iterations = 5)
@Measurement(iterations = 10)
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
@State(Scope.Benchmark)
public class QueryPlanningBenchmark {

static {
LogConfigurator.configureESLogging();
}

private PlanTelemetry telemetry;
private EsqlParser parser;
private Analyzer analyzer;
private LogicalPlanOptimizer optimizer;

@Setup
public void setup() {

var config = new Configuration(
DateUtils.UTC,
Locale.US,
null,
null,
new QueryPragmas(Settings.EMPTY),
EsqlPlugin.QUERY_RESULT_TRUNCATION_MAX_SIZE.getDefault(Settings.EMPTY),
EsqlPlugin.QUERY_RESULT_TRUNCATION_DEFAULT_SIZE.getDefault(Settings.EMPTY),
"",
false,
Map.of(),
System.nanoTime(),
false
);

var fields = 10_000;
var mapping = LinkedHashMap.<String, EsField>newLinkedHashMap(fields);
for (int i = 0; i < fields; i++) {
mapping.put("field" + i, new EsField("field-" + i, TEXT, emptyMap(), true));

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For planning purposes, do we care that all fields are TEXT? Really asking - is field types something else we should vary?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You have to start somewhere I suppose. I don't believe the types make a big difference for the query being tested in this case.

It's pretty normal to build parameters on these things so you run it like:

./gradlew -p benchmark run --args 'QueryPlanning -pfield_count=10000 -pfield_config=long -pquery="FROM test | EVAL a = field1 + field2"'

Or something. There's a bit of tension here because the default if you run it without any parameters is to run it with the combinatorial explosion of all of a configured set of parameters - and that can be trouble. We're expecting to be a bit more selective on how we run these than that. But, yeah, I think this PR is a good start and can tell us a lot as is.

}

var esIndex = new EsIndex("test", mapping, Map.of("test", IndexMode.STANDARD));

var functionRegistry = new EsqlFunctionRegistry();

telemetry = new PlanTelemetry(functionRegistry);
parser = new EsqlParser();
analyzer = new Analyzer(
new AnalyzerContext(
config,
functionRegistry,
IndexResolution.valid(esIndex),
Map.of(),
new EnrichResolution(),
InferenceResolution.EMPTY
),
new Verifier(new Metrics(functionRegistry), new XPackLicenseState(() -> 0L))
);
optimizer = new LogicalPlanOptimizer(new LogicalOptimizerContext(config, FoldContext.small()));
}

private LogicalPlan plan(String query) {
var parsed = parser.createStatement(query, new QueryParams(), telemetry);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am puzzled why telemetry is passed as a method argument (even though it is a stateless service like) and AnalyzerContext is passed to Analyzer via constructor even though it is query specific and prevents us from reusing Analyzer instance across queries, also similar with LogicalOptimizerContext and LogicalPlanOptimizer.

Could somebody share history behind that?
If we change it it would also allow to add benchmarks easier with plan(context, "FROM test | LIMIT 10")

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am puzzled why telemetry is passed as a method argument (even though it is a stateless service like)

The PlanTelemetry object gathers metrics specific to each query. Iif the query succeeds, the metrics are pushed through the service, late in a listener. If it fails, just one counter is bumped.

AnalyzerContext is passed to Analyzer via constructor even though it is query specific and prevents us from reusing Analyzer instance across queries

I think this is required by the inheritance / hierarchy, the context being set in a parent class ParameterizedRuleExecutor, so that any executor rules that are context specific can have guaranteed access to it.

var analyzed = analyzer.analyze(parsed);

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure if we care, but this literally can be a single line:
return optimizer.optimize(analyzer.analyze(parser.createStatement(query, new QueryParams(), telemetry)));

var optimized = optimizer.optimize(analyzed);
return optimized;
}

@Benchmark
public void run(Blackhole blackhole) {
blackhole.consume(plan("FROM test | LIMIT 10"));
}
}