Skip to content

Commit 2fd160b

Browse files
authored
Avoid rewrite round_to with expensive queries (#135987) (#135992)
Today, we use a threshold (defaults to 128) to avoid generating too many sub-queries when replacing round_to with sub-queries. However, we do not account for cases where the main query is expensive. In such cases, running many expensive queries is slower and more costly than running a single query and then reading values and rounding. Our benchmark shows that this query takes 800ms with query-and-tags, but only 40ms without it. TS metric* | WHERE host.name LIKE \"host-*\" AND @timestamp >= \"2025-07-25T12:55:59.000Z\" AND @timestamp <= \"2025-07-25T17:25:59.000Z\" | STATS AVG(AVG_OVER_TIME(`metrics.system.cpu.load_average.1m`)) BY host.name, TBUCKET(5 minutes) And this query: TS new_metrics* | WHERE host.name IN("host-0", "host-1", "host-2") AND @timestamp >= "2025-07-25T12:55:59.000Z" AND @timestamp <= "2025-07-25T17:25:59.000Z" | STATS AVG(AVG_OVER_TIME(`metrics.system.cpu.load_average.1m`)) BY host.name, TBUCKET(5 minutes) reduces from 50ms to 10ms. This change proposes using the threshold as the number of query clauses and assigning higher weights to expensive queries, such as wildcard or prefix queries. This allows us to disable the rewrite when it is less efficient, while still enabling it if the number of sub-queries is small.
1 parent 307cd0d commit 2fd160b

File tree

3 files changed

+153
-1
lines changed

3 files changed

+153
-1
lines changed

docs/changelog/135987.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 135987
2+
summary: Avoid rewrite `round_to` with expensive queries
3+
area: ES|QL
4+
type: bug
5+
issues: []

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/ReplaceRoundToWithQueryAndTags.java

Lines changed: 78 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,18 @@
77

88
package org.elasticsearch.xpack.esql.optimizer.rules.physical.local;
99

10+
import org.elasticsearch.index.IndexMode;
11+
import org.elasticsearch.index.query.BoolQueryBuilder;
12+
import org.elasticsearch.index.query.FuzzyQueryBuilder;
13+
import org.elasticsearch.index.query.MatchAllQueryBuilder;
14+
import org.elasticsearch.index.query.MatchNoneQueryBuilder;
15+
import org.elasticsearch.index.query.MultiTermQueryBuilder;
16+
import org.elasticsearch.index.query.PrefixQueryBuilder;
1017
import org.elasticsearch.index.query.QueryBuilder;
18+
import org.elasticsearch.index.query.RangeQueryBuilder;
19+
import org.elasticsearch.index.query.RegexpQueryBuilder;
20+
import org.elasticsearch.index.query.TermsQueryBuilder;
21+
import org.elasticsearch.index.query.WildcardQueryBuilder;
1122
import org.elasticsearch.logging.LogManager;
1223
import org.elasticsearch.logging.Logger;
1324
import org.elasticsearch.xpack.esql.core.expression.Alias;
@@ -30,6 +41,8 @@
3041
import org.elasticsearch.xpack.esql.plan.physical.EsQueryExec;
3142
import org.elasticsearch.xpack.esql.plan.physical.EvalExec;
3243
import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan;
44+
import org.elasticsearch.xpack.esql.querydsl.query.SingleValueQuery;
45+
import org.elasticsearch.xpack.esql.stats.SearchStats;
3346

3447
import java.time.ZoneId;
3548
import java.util.ArrayList;
@@ -275,7 +288,12 @@ protected PhysicalPlan rule(EvalExec evalExec, LocalPhysicalOptimizerContext ctx
275288
if (roundTos.size() == 1) {
276289
RoundTo roundTo = roundTos.get(0);
277290
int count = roundTo.points().size();
278-
int roundingPointsUpperLimit = roundingPointsThreshold(ctx);
291+
int roundingPointsUpperLimit = adjustedRoundingPointsThreshold(
292+
ctx.searchStats(),
293+
roundingPointsThreshold(ctx),
294+
queryExec.query(),
295+
queryExec.indexMode()
296+
);
279297
if (count > roundingPointsUpperLimit) {
280298
logger.debug(
281299
"Skipping RoundTo push down for [{}], as it has [{}] points, which is more than [{}]",
@@ -485,4 +503,63 @@ private int roundingPointsThreshold(LocalPhysicalOptimizerContext ctx) {
485503
}
486504
return roundingPointsThreshold;
487505
}
506+
507+
/**
508+
* If the main query is expensive (such as including wildcard queries), executing more queries with tags is slower and more costly
509+
* than executing fewer queries without tags and then reading points and rounding. The rounding points threshold is treated as the
510+
* maximum number of clauses allowed to execute. We estimate the number of clauses in the main query and adjust the threshold so
511+
* that the total number of clauses does not exceed the limit by too much. Some expensive queries count as more than one clause;
512+
* for example, a wildcard query counts as 5 clauses, and a terms query counts as the number of terms.
513+
*/
514+
static int adjustedRoundingPointsThreshold(SearchStats stats, int threshold, QueryBuilder query, IndexMode indexMode) {
515+
int clauses = estimateQueryClauses(stats, query) + 1;
516+
if (indexMode == IndexMode.TIME_SERIES) {
517+
// No doc partitioning for time_series sources; increase the threshold to trade overhead for parallelism.
518+
threshold *= 2;
519+
}
520+
return Math.ceilDiv(threshold, clauses);
521+
}
522+
523+
static int estimateQueryClauses(SearchStats stats, QueryBuilder q) {
524+
if (q == null || q instanceof MatchAllQueryBuilder || q instanceof MatchNoneQueryBuilder) {
525+
return 0;
526+
}
527+
if (q instanceof WildcardQueryBuilder
528+
|| q instanceof RegexpQueryBuilder
529+
|| q instanceof PrefixQueryBuilder
530+
|| q instanceof FuzzyQueryBuilder) {
531+
return 5;
532+
}
533+
if (q instanceof RangeQueryBuilder r) {
534+
// with points count 1, without count 3
535+
return stats.min(new FieldAttribute.FieldName(r.fieldName())) != null ? 1 : 3;
536+
}
537+
if (q instanceof MultiTermQueryBuilder) {
538+
return 3;
539+
}
540+
if (q instanceof TermsQueryBuilder terms && terms.values() != null) {
541+
return terms.values().size();
542+
}
543+
if (q instanceof SingleValueQuery.Builder b) {
544+
// ignore the single_value clause
545+
return Math.max(1, estimateQueryClauses(stats, b.next()));
546+
}
547+
if (q instanceof BoolQueryBuilder bq) {
548+
int total = 0;
549+
for (var c : bq.filter()) {
550+
total += estimateQueryClauses(stats, c);
551+
}
552+
for (var c : bq.must()) {
553+
total += estimateQueryClauses(stats, c);
554+
}
555+
for (var c : bq.should()) {
556+
total += estimateQueryClauses(stats, c);
557+
}
558+
for (var c : bq.mustNot()) {
559+
total += Math.max(2, estimateQueryClauses(stats, c));
560+
}
561+
return total;
562+
}
563+
return 1;
564+
}
488565
}

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/ReplaceRoundToWithQueryAndTagsTests.java

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
import java.util.Locale;
5454
import java.util.Map;
5555
import java.util.stream.Collectors;
56+
import java.util.stream.IntStream;
5657

5758
import static org.elasticsearch.compute.aggregation.AggregatorMode.FINAL;
5859
import static org.elasticsearch.compute.aggregation.AggregatorMode.SINGLE;
@@ -67,6 +68,7 @@
6768
import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.DEFAULT_DATE_TIME_FORMATTER;
6869
import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.dateNanosToLong;
6970
import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.dateTimeToLong;
71+
import static org.hamcrest.Matchers.equalTo;
7072
import static org.hamcrest.Matchers.is;
7173

7274
//@TestLogging(value = "org.elasticsearch.xpack.esql:TRACE", reason = "debug")
@@ -537,6 +539,74 @@ public void testRoundToTransformToQueryAndTagsWithCustomizedUpperLimit() {
537539
}
538540
}
539541

542+
static String pointArray(int numPoints) {
543+
return IntStream.range(0, numPoints).mapToObj(Integer::toString).collect(Collectors.joining(","));
544+
}
545+
546+
static int queryAndTags(PhysicalPlan plan) {
547+
EsQueryExec esQuery = (EsQueryExec) plan.collectFirstChildren(EsQueryExec.class::isInstance).getFirst();
548+
return esQuery.queryBuilderAndTags().size();
549+
}
550+
551+
public void testAdjustThresholdForQueries() {
552+
{
553+
int points = between(2, 127);
554+
String q = String.format(Locale.ROOT, """
555+
from test
556+
| stats count(*) by x = round_to(integer, %s)
557+
""", pointArray(points));
558+
PhysicalPlan plan = plannerOptimizer.plan(q, searchStats, makeAnalyzer("mapping-all-types.json"));
559+
int queryAndTags = queryAndTags(plan);
560+
assertThat(queryAndTags, equalTo(points + 1)); // include null bucket
561+
}
562+
{
563+
int points = between(2, 64);
564+
String q = String.format(Locale.ROOT, """
565+
from test
566+
| where date >= "2023-10-19"
567+
| stats count(*) by x = round_to(integer, %s)
568+
""", pointArray(points));
569+
var plan = plannerOptimizer.plan(q, searchStats, makeAnalyzer("mapping-all-types.json"));
570+
int queryAndTags = queryAndTags(plan);
571+
assertThat(queryAndTags, equalTo(points + 1)); // include null bucket
572+
}
573+
{
574+
int points = between(65, 128);
575+
String q = String.format(Locale.ROOT, """
576+
from test
577+
| where date >= "2023-10-19"
578+
| stats count(*) by x = round_to(integer, %s)
579+
""", pointArray(points));
580+
var plan = plannerOptimizer.plan(q, searchStats, makeAnalyzer("mapping-all-types.json"));
581+
int queryAndTags = queryAndTags(plan);
582+
assertThat(queryAndTags, equalTo(1)); // no rewrite
583+
}
584+
{
585+
int points = between(2, 19);
586+
String q = String.format(Locale.ROOT, """
587+
from test
588+
| where date >= "2023-10-19"
589+
| where keyword LIKE "w*"
590+
| stats count(*) by x = round_to(integer, %s)
591+
""", pointArray(points));
592+
var plan = plannerOptimizer.plan(q, searchStats, makeAnalyzer("mapping-all-types.json"));
593+
int queryAndTags = queryAndTags(plan);
594+
assertThat("points=" + points, queryAndTags, equalTo(points + 1)); // include null bucket
595+
}
596+
{
597+
int points = between(20, 128);
598+
String q = String.format(Locale.ROOT, """
599+
from test
600+
| where date >= "2023-10-19"
601+
| where keyword LIKE "*w*"
602+
| stats count(*) by x = round_to(integer, %s)
603+
""", pointArray(points));
604+
PhysicalPlan plan = plannerOptimizer.plan(q, searchStats, makeAnalyzer("mapping-all-types.json"));
605+
int queryAndTags = queryAndTags(plan);
606+
assertThat("points=" + points, queryAndTags, equalTo(1)); // no rewrite
607+
}
608+
}
609+
540610
private static void verifyQueryAndTags(List<EsQueryExec.QueryBuilderAndTags> expected, List<EsQueryExec.QueryBuilderAndTags> actual) {
541611
assertEquals(expected.size(), actual.size());
542612
for (int i = 0; i < expected.size(); i++) {

0 commit comments

Comments
 (0)