-
Notifications
You must be signed in to change notification settings - Fork 178
Implement reverse performance optimization
#4056
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 12 commits
bd75b7c
6cc5a69
a5b3df8
5ab39f9
3a44688
4343b47
7fabbe6
3ff4fa5
d4d70c5
dff4522
2501ed4
980482d
409bfa9
e8c5478
8bca222
a16c2c5
63a1f80
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,117 @@ | ||
| /* | ||
| * Copyright OpenSearch Contributors | ||
| * SPDX-License-Identifier: Apache-2.0 | ||
| */ | ||
|
|
||
| package org.opensearch.sql.calcite.rule; | ||
|
|
||
| import org.apache.calcite.plan.RelOptRule; | ||
| import org.apache.calcite.plan.RelOptRuleCall; | ||
| import org.apache.calcite.rel.logical.LogicalSort; | ||
| import org.apache.logging.log4j.LogManager; | ||
| import org.apache.logging.log4j.Logger; | ||
| import org.opensearch.sql.calcite.plan.LogicalSystemLimit; | ||
|
|
||
| /** Combines sort then reverse into 1 sort. */ | ||
|
||
| public class SortReverseOptimizationRule extends RelOptRule { | ||
dai-chen marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| private static final Logger LOG = LogManager.getLogger(SortReverseOptimizationRule.class); | ||
|
|
||
| public static final SortReverseOptimizationRule INSTANCE = new SortReverseOptimizationRule(); | ||
|
|
||
| private SortReverseOptimizationRule() { | ||
| super( | ||
| operand(LogicalSort.class, operand(LogicalSort.class, any())), | ||
| "SortReverseOptimizationRule"); | ||
| } | ||
|
|
||
| @Override | ||
| public boolean matches(RelOptRuleCall call) { | ||
|
||
| LogicalSort outerSort = call.rel(0); | ||
| LogicalSort innerSort = call.rel(1); | ||
|
|
||
| LOG.debug("SortReverseOptimizationRule.matches() called"); | ||
| LOG.debug("Outer sort: {}", outerSort); | ||
| LOG.debug("Inner sort: {}", innerSort); | ||
| LOG.debug("Inner sort input: {}", innerSort.getInput()); | ||
dai-chen marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| // Don't optimize if outer sort is a LogicalSystemLimit | ||
| if (call.rel(0) instanceof LogicalSystemLimit) { | ||
| LOG.debug("Skipping: outer sort is LogicalSystemLimit"); | ||
| return false; | ||
| } | ||
|
|
||
| // Don't optimize if inner sort has a fetch limit (head/limit before sort) | ||
| // This preserves limit-then-sort semantics | ||
dai-chen marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| if (innerSort.fetch != null) { | ||
| LOG.debug("Skipping: inner sort has fetch limit: {}", innerSort.fetch); | ||
| return false; | ||
| } | ||
|
|
||
| // Must be same field with opposite directions (sort | reverse pattern) | ||
| boolean matches = hasSameFieldWithOppositeDirection(outerSort, innerSort); | ||
| LOG.debug("Same field with opposite direction: {}", matches); | ||
| return matches; | ||
| } | ||
|
|
||
| @Override | ||
| public void onMatch(RelOptRuleCall call) { | ||
| LogicalSort outerSort = call.rel(0); | ||
| LogicalSort innerSort = call.rel(1); | ||
|
|
||
| LOG.debug("SortReverseOptimizationRule.onMatch() applying transformation"); | ||
| LOG.debug("Transforming from: {} -> {}", outerSort, innerSort); | ||
|
|
||
| LogicalSort optimizedSort = | ||
| LogicalSort.create( | ||
| innerSort.getInput(), outerSort.getCollation(), outerSort.offset, outerSort.fetch); | ||
|
|
||
| LOG.debug("Transformed to: {}", optimizedSort); | ||
| call.transformTo(optimizedSort); | ||
| } | ||
|
|
||
| private boolean hasSameFieldWithOppositeDirection(LogicalSort outerSort, LogicalSort innerSort) { | ||
| var outerFields = outerSort.getCollation().getFieldCollations(); | ||
| var innerFields = innerSort.getCollation().getFieldCollations(); | ||
|
|
||
| if (outerFields.isEmpty() || innerFields.isEmpty()) { | ||
| LOG.debug("No field collations found"); | ||
| return false; | ||
| } | ||
|
|
||
| // Must have same number of fields | ||
| if (outerFields.size() != innerFields.size()) { | ||
| LOG.debug( | ||
| "Different number of sort fields: outer={}, inner={}", | ||
| outerFields.size(), | ||
| innerFields.size()); | ||
| return false; | ||
| } | ||
|
|
||
| // Check all fields have same index but opposite directions | ||
| for (int i = 0; i < outerFields.size(); i++) { | ||
| var outerField = outerFields.get(i); | ||
| var innerField = innerFields.get(i); | ||
|
|
||
| LOG.debug( | ||
| "Field {}: outer(index={}, direction={}), inner(index={}, direction={})", | ||
| i, | ||
| outerField.getFieldIndex(), | ||
| outerField.getDirection(), | ||
| innerField.getFieldIndex(), | ||
| innerField.getDirection()); | ||
|
|
||
| if (outerField.getFieldIndex() != innerField.getFieldIndex() | ||
| || outerField.getDirection() == innerField.getDirection()) { | ||
| LOG.debug( | ||
| "Field {} mismatch: same index={}, opposite direction={}", | ||
| i, | ||
| outerField.getFieldIndex() == innerField.getFieldIndex(), | ||
| outerField.getDirection() != innerField.getDirection()); | ||
| return false; | ||
| } | ||
| } | ||
|
|
||
| LOG.debug("All fields match with opposite directions"); | ||
| return true; | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So we assume the head of this collation list must come from the
sortcommand right beforereversecommand right? Is it always true or the reverse logic below works even though it's not true?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For Q2: Take query
... | sort | X | Y | Z | reversefor example, we should be good as long as commands in-between (X, Y, Z) doesn't rely on the output order ofsortcommand, right? Just thinking any counterexample, such as... | sort | head | reverse?I'm thinking is it too early to do this in
visitReverse? Essentially, ourCalciteRelNodeVisitorconverts AST to its logical representation by RelNode. Just wondering what does it look like if we simply translate reverse to logical sort operator here?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Great question about edge cases! I tested both
| sort | head 10 | reverseand| sort | eval | head | reversepatterns and they work correctly. In the first part of visitReverse the SORT ASC and LIMIT for head are combined intoLogicalSort(sort0=[$0], dir0=[ASC-nulls-first], fetch=[10])and then the reverseCollation is added withLogicalSort(sort0=[$0], dir0=[DESC-nulls-last]). The physical plan pushes SORT ASC + LIMIT N to OpenSearch, then applies DESC sort on the limited results. Let me know if there are any other edge cases, Thanks!