-
Notifications
You must be signed in to change notification settings - Fork 25.5k
Avoid rewrite round_to with expensive queries #135987
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
6 commits
Select commit
Hold shift + click to select a range
14c6551
Avoid rewrite round_to with expensive queries
dnhatn c9ce8b8
Update docs/changelog/135987.yaml
dnhatn b055ae0
Add fuzzy
dnhatn 2f7fd82
Add range with points
dnhatn e0ec420
harden tests
dnhatn 327f284
Merge remote-tracking branch 'dnhatn/round_to_expensive_queries' into…
dnhatn File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
pr: 135987 | ||
summary: Avoid rewrite `round_to` with expensive queries | ||
area: ES|QL | ||
type: bug | ||
issues: [] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,7 +7,18 @@ | |
|
||
package org.elasticsearch.xpack.esql.optimizer.rules.physical.local; | ||
|
||
import org.elasticsearch.index.IndexMode; | ||
import org.elasticsearch.index.query.BoolQueryBuilder; | ||
import org.elasticsearch.index.query.FuzzyQueryBuilder; | ||
import org.elasticsearch.index.query.MatchAllQueryBuilder; | ||
import org.elasticsearch.index.query.MatchNoneQueryBuilder; | ||
import org.elasticsearch.index.query.MultiTermQueryBuilder; | ||
import org.elasticsearch.index.query.PrefixQueryBuilder; | ||
import org.elasticsearch.index.query.QueryBuilder; | ||
import org.elasticsearch.index.query.RangeQueryBuilder; | ||
import org.elasticsearch.index.query.RegexpQueryBuilder; | ||
import org.elasticsearch.index.query.TermsQueryBuilder; | ||
import org.elasticsearch.index.query.WildcardQueryBuilder; | ||
import org.elasticsearch.logging.LogManager; | ||
import org.elasticsearch.logging.Logger; | ||
import org.elasticsearch.xpack.esql.core.expression.Alias; | ||
|
@@ -30,6 +41,8 @@ | |
import org.elasticsearch.xpack.esql.plan.physical.EsQueryExec; | ||
import org.elasticsearch.xpack.esql.plan.physical.EvalExec; | ||
import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan; | ||
import org.elasticsearch.xpack.esql.querydsl.query.SingleValueQuery; | ||
import org.elasticsearch.xpack.esql.stats.SearchStats; | ||
|
||
import java.time.ZoneId; | ||
import java.util.ArrayList; | ||
|
@@ -275,7 +288,12 @@ protected PhysicalPlan rule(EvalExec evalExec, LocalPhysicalOptimizerContext ctx | |
if (roundTos.size() == 1) { | ||
RoundTo roundTo = roundTos.get(0); | ||
int count = roundTo.points().size(); | ||
int roundingPointsUpperLimit = roundingPointsThreshold(ctx); | ||
int roundingPointsUpperLimit = adjustedRoundingPointsThreshold( | ||
ctx.searchStats(), | ||
roundingPointsThreshold(ctx), | ||
queryExec.query(), | ||
queryExec.indexMode() | ||
); | ||
if (count > roundingPointsUpperLimit) { | ||
logger.debug( | ||
"Skipping RoundTo push down for [{}], as it has [{}] points, which is more than [{}]", | ||
|
@@ -485,4 +503,63 @@ private int roundingPointsThreshold(LocalPhysicalOptimizerContext ctx) { | |
} | ||
return roundingPointsThreshold; | ||
} | ||
|
||
/** | ||
* If the main query is expensive (such as including wildcard queries), executing more queries with tags is slower and more costly | ||
* than executing fewer queries without tags and then reading points and rounding. The rounding points threshold is treated as the | ||
* maximum number of clauses allowed to execute. We estimate the number of clauses in the main query and adjust the threshold so | ||
* that the total number of clauses does not exceed the limit by too much. Some expensive queries count as more than one clause; | ||
* for example, a wildcard query counts as 5 clauses, and a terms query counts as the number of terms. | ||
*/ | ||
static int adjustedRoundingPointsThreshold(SearchStats stats, int threshold, QueryBuilder query, IndexMode indexMode) { | ||
int clauses = estimateQueryClauses(stats, query) + 1; | ||
if (indexMode == IndexMode.TIME_SERIES) { | ||
// No doc partitioning for time_series sources; increase the threshold to trade overhead for parallelism. | ||
threshold *= 2; | ||
} | ||
return Math.ceilDiv(threshold, clauses); | ||
} | ||
|
||
static int estimateQueryClauses(SearchStats stats, QueryBuilder q) { | ||
if (q == null || q instanceof MatchAllQueryBuilder || q instanceof MatchNoneQueryBuilder) { | ||
return 0; | ||
} | ||
if (q instanceof WildcardQueryBuilder | ||
|| q instanceof RegexpQueryBuilder | ||
|| q instanceof PrefixQueryBuilder | ||
|| q instanceof FuzzyQueryBuilder) { | ||
return 5; | ||
} | ||
if (q instanceof RangeQueryBuilder r) { | ||
// with points count 1, without count 3 | ||
return stats.min(new FieldAttribute.FieldName(r.fieldName())) != null ? 1 : 3; | ||
} | ||
if (q instanceof MultiTermQueryBuilder) { | ||
return 3; | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we also score phrase queries differently? |
||
if (q instanceof TermsQueryBuilder terms && terms.values() != null) { | ||
return terms.values().size(); | ||
} | ||
if (q instanceof SingleValueQuery.Builder b) { | ||
// ignore the single_value clause | ||
return Math.max(1, estimateQueryClauses(stats, b.next())); | ||
} | ||
if (q instanceof BoolQueryBuilder bq) { | ||
int total = 0; | ||
for (var c : bq.filter()) { | ||
total += estimateQueryClauses(stats, c); | ||
} | ||
for (var c : bq.must()) { | ||
total += estimateQueryClauses(stats, c); | ||
} | ||
for (var c : bq.should()) { | ||
total += estimateQueryClauses(stats, c); | ||
} | ||
for (var c : bq.mustNot()) { | ||
total += Math.max(2, estimateQueryClauses(stats, c)); | ||
} | ||
return total; | ||
} | ||
return 1; | ||
} | ||
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Super nit: conside adding constants for these numbers (2, 5 etc).