Skip to content
6 changes: 6 additions & 0 deletions docs/changelog/131833.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 131833
summary: "ESQL: Remove redundant `TopN`"
area: ES|QL
type: enhancement
issues:
- 131233
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,12 @@ protected static Batch<LogicalPlan> operators(boolean local) {
}

protected static Batch<LogicalPlan> cleanup() {
return new Batch<>("Clean Up", new ReplaceLimitAndSortAsTopN(), new ReplaceRowAsLocalRelation(), new PropgateUnmappedFields());
return new Batch<>(
"Clean Up",
new ReplaceLimitAndSortAsTopN(),
new PruneRedundantOrderBy(),
new ReplaceRowAsLocalRelation(),
new PropgateUnmappedFields()
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ public class PruneRedundantOrderBy extends OptimizerRules.OptimizerRule<LogicalP
@Override
protected LogicalPlan rule(LogicalPlan plan) {
if (plan instanceof OrderBy || plan instanceof TopN || plan instanceof Aggregate) {
Set<OrderBy> redundant = findRedundantSort(((UnaryPlan) plan).child());
Set<LogicalPlan> redundant = findRedundantSort(plan);
if (redundant.isEmpty()) {
return plan;
}
Expand All @@ -58,25 +58,32 @@ protected LogicalPlan rule(LogicalPlan plan) {
* breadth-first recursion to find redundant SORTs in the children tree.
* Returns an identity set (we need to compare and prune the exact instances)
*/
private Set<OrderBy> findRedundantSort(LogicalPlan plan) {
Set<OrderBy> result = Collections.newSetFromMap(new IdentityHashMap<>());
private Set<LogicalPlan> findRedundantSort(LogicalPlan plan) {
Set<LogicalPlan> result = Collections.newSetFromMap(new IdentityHashMap<>());

Deque<LogicalPlan> toCheck = new ArrayDeque<>();
toCheck.push(plan);
toCheck.push(((UnaryPlan) plan).child());

while (true) {
if (toCheck.isEmpty()) {
return result;
}
while (toCheck.isEmpty() == false) {
LogicalPlan p = toCheck.pop();
if (p instanceof OrderBy ob) {
result.add(ob);
toCheck.push(ob.child());
} else if (p instanceof TopN childTopN && plan instanceof TopN parentTopN) {
// Check if a child TopN is redundant compared to a parent TopN.
// A child TopN is redundant if it has the same sort order as the parent.
// We do not need to compare their values because `PushDownAndCombineLimits`
// has already pushed down the lower limit value
if (childTopN.order().equals(parentTopN.order())) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Caution: equals will currently still return true even if we order by 2 attributes that have the same name+type, but not the same NameId - i.e. attributes that mean something else.

This is a different bug that needs fixing separately, but I wanted to point out that currently it will prevent this from rightfully triggering in a couple situations.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, I’m currently encountering this bug while resolving another issue. This is sad 😭

image

Is this bug touching too much code, so it’s hard to fix?

result.add(childTopN);
toCheck.push(childTopN.child());
}
} else if (p instanceof SortAgnostic) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new version of the rule is incorrect, I'm afraid.

We continue going down the plan as long as the children are SortAgnostic. However, being SortAgnostic doesn't automatically imply that the plans behave the same when we feed them an unlimited number of rows.

For instance, INLINE STATS is SortAgnostic (resp. InlineJoin is). But removing a TopN from before an INLINE STATS changes the meaning of the query.

Also, queries can become much more expensive: For instance, MvExpand is SortAgnostic, but SORT a | LIMIT 10 | MV_EXPAND b | SORT a | LIMIT 10 is generally cheaper to run than just MV_EXPAND b | SORT a | LIMIT 3.

for (LogicalPlan child : p.children()) {
toCheck.push(child);
}
}
}
return result;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7983,6 +7983,49 @@ public void testPruneRedundantOrderBy() {
as(mvExpand2.child(), Row.class);
}

/**
* <pre>{@code
* EsqlProject[[first_name{f}#6]]
* \_TopN[[Order[first_name{f}#6,ASC,LAST]],10[INTEGER]]
* \_EsRelation[test][_meta_field{f}#11, emp_no{f}#5, first_name{f}#6, ge..]
* }</pre>
*/
public void testPruneRedundantTopN() {
var plan = optimizedPlan("""
FROM test
| KEEP first_name
| SORT first_name
| LIMIT 100
| SORT first_name
| LIMIT 10
""");
var project = as(plan, Project.class);
var topN = as(project.child(), TopN.class);
as(topN.child(), EsRelation.class);
}

/**
* <pre>{@code
* EsqlProject[[first_name{f}#7]]
* \_TopN[[Order[first_name{f}#7,ASC,LAST]],10[INTEGER]]
* \_EsRelation[test][_meta_field{f}#12, emp_no{f}#6, first_name{f}#7, ge..]
* }</pre>
*/
public void testPruneRedundantTopNWithSortAgnosticsInBetween() {
var plan = optimizedPlan("""
FROM test
| SORT first_name
| LIMIT 10
| drop last_name
| keep first_name
| SORT first_name
| LIMIT 100
""");
var project = as(plan, Project.class);
var topN = as(project.child(), TopN.class);
as(topN.child(), EsRelation.class);
}

/**
* <pre>{@code
* Eval[[1[INTEGER] AS irrelevant1, 2[INTEGER] AS irrelevant2]]
Expand Down