From bd75b7cb0ced06bc142b434442bf7f9a728bee0a Mon Sep 17 00:00:00 2001 From: Selina Song Date: Mon, 11 Aug 2025 16:26:09 -0700 Subject: [PATCH 01/16] WIP: sort flip collation Signed-off-by: Selina Song --- .../sql/calcite/CalciteRelNodeVisitor.java | 38 +++++++---- .../rule/SortReverseOptimizationRule.java | 65 +++++++++++++++++++ .../sql/calcite/utils/PlanUtils.java | 58 +++++++---------- .../sql/calcite/remote/CalciteExplainIT.java | 33 +++++++++- .../remote/CalciteReverseCommandIT.java | 62 ++++++++++++++++-- 5 files changed, 205 insertions(+), 51 deletions(-) create mode 100644 core/src/main/java/org/opensearch/sql/calcite/rule/SortReverseOptimizationRule.java diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 5261d438863..3346272dbef 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -38,6 +38,8 @@ import java.util.stream.Stream; import org.apache.calcite.plan.RelOptTable; import org.apache.calcite.plan.ViewExpanders; +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelCollations; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Aggregate; import org.apache.calcite.rel.core.JoinRelType; @@ -567,19 +569,29 @@ public RelNode visitHead(Head node, CalcitePlanContext context) { public RelNode visitReverse( org.opensearch.sql.ast.tree.Reverse node, CalcitePlanContext context) { visitChildren(node, context); - // Add ROW_NUMBER() column - RexNode rowNumber = - context - .relBuilder - .aggregateCall(SqlStdOperatorTable.ROW_NUMBER) - .over() - .rowsTo(RexWindowBounds.CURRENT_ROW) - .as(REVERSE_ROW_NUM); - context.relBuilder.projectPlus(rowNumber); - // Sort by row number descending - context.relBuilder.sort(context.relBuilder.desc(context.relBuilder.field(REVERSE_ROW_NUM))); - // Remove row number column - context.relBuilder.projectExcept(context.relBuilder.field(REVERSE_ROW_NUM)); + + RelCollation collation = context.relBuilder.peek().getTraitSet().getCollation(); + if (collation == null || collation == RelCollations.EMPTY) { + // If no collation exists, use the traditional row_number approach + // Add ROW_NUMBER() column + RexNode rowNumber = + context + .relBuilder + .aggregateCall(SqlStdOperatorTable.ROW_NUMBER) + .over() + .rowsTo(RexWindowBounds.CURRENT_ROW) + .as(REVERSE_ROW_NUM); + context.relBuilder.projectPlus(rowNumber); + // Sort by row number descending + context.relBuilder.sort(context.relBuilder.desc(context.relBuilder.field(REVERSE_ROW_NUM))); + // Remove row number column + context.relBuilder.projectExcept(context.relBuilder.field(REVERSE_ROW_NUM)); + } else { + // If collation exists, reverse the sort direction + RelCollation reversedCollation = PlanUtils.reverseCollation(collation); + context.relBuilder.sort(reversedCollation); + } + return context.relBuilder.peek(); } diff --git a/core/src/main/java/org/opensearch/sql/calcite/rule/SortReverseOptimizationRule.java b/core/src/main/java/org/opensearch/sql/calcite/rule/SortReverseOptimizationRule.java new file mode 100644 index 00000000000..c5461f8ddaa --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/calcite/rule/SortReverseOptimizationRule.java @@ -0,0 +1,65 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.rule; + +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.rel.logical.LogicalSort; +import org.opensearch.sql.calcite.plan.LogicalSystemLimit; + +/** + * Optimization rule that eliminates redundant consecutive sorts on the same field. + * Detects: LogicalSort(field, direction1) -> LogicalSort(field, direction2) + * Converts to: LogicalSort(field, direction1) (keeps outer sort) + */ +public class SortReverseOptimizationRule extends RelOptRule { + + public static final SortReverseOptimizationRule INSTANCE = new SortReverseOptimizationRule(); + + private SortReverseOptimizationRule() { + super(operand(LogicalSort.class, + operand(LogicalSort.class, any())), + "SortReverseOptimizationRule"); + } + + @Override + public boolean matches(RelOptRuleCall call) { + LogicalSort outerSort = call.rel(0); + LogicalSort innerSort = call.rel(1); + + // Don't optimize if outer sort is a LogicalSystemLimit - we want to preserve system limits + if (call.rel(0) instanceof LogicalSystemLimit) { + return false; + } + + return hasSameField(outerSort, innerSort); + } + + @Override + public void onMatch(RelOptRuleCall call) { + LogicalSort outerSort = call.rel(0); + LogicalSort innerSort = call.rel(1); + + LogicalSort optimizedSort = LogicalSort.create( + innerSort.getInput(), + outerSort.getCollation(), + outerSort.offset, + outerSort.fetch); + + call.transformTo(optimizedSort); + } + + private boolean hasSameField(LogicalSort outerSort, LogicalSort innerSort) { + if (outerSort.getCollation().getFieldCollations().isEmpty() + || innerSort.getCollation().getFieldCollations().isEmpty()) { + return false; + } + + int outerField = outerSort.getCollation().getFieldCollations().get(0).getFieldIndex(); + int innerField = innerSort.getCollation().getFieldCollations().get(0).getFieldIndex(); + return outerField == innerField; + } +} \ No newline at end of file diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java index f98aebd2e7c..056e5b38358 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java @@ -16,6 +16,9 @@ import java.util.List; import javax.annotation.Nullable; import org.apache.calcite.plan.RelOptTable; +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelCollations; +import org.apache.calcite.rel.RelFieldCollation; import org.apache.calcite.rel.RelHomogeneousShuttle; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.RelShuttle; @@ -355,40 +358,29 @@ static RexNode derefMapCall(RexNode rexNode) { return rexNode; } - /** Check if contains RexOver */ - static boolean containsRowNumberDedup(LogicalProject project) { - return project.getProjects().stream() - .anyMatch(p -> p instanceof RexOver && p.getKind() == SqlKind.ROW_NUMBER); - } + /** + * Reverses the direction of a RelCollation. + * + * @param original The original collation to reverse + * @return A new RelCollation with reversed directions + */ + public static RelCollation reverseCollation(RelCollation original) { + if (original == null || original.getFieldCollations().isEmpty()) { + return original; + } - /** Get all RexWindow list from LogicalProject */ - static List getRexWindowFromProject(LogicalProject project) { - final List res = new ArrayList<>(); - final RexVisitorImpl visitor = - new RexVisitorImpl<>(true) { - @Override - public Void visitOver(RexOver over) { - res.add(over.getWindow()); - return null; - } - }; - visitor.visitEach(project.getProjects()); - return res; - } + List reversedFields = new ArrayList<>(); + for (RelFieldCollation field : original.getFieldCollations()) { + RelFieldCollation.Direction reversedDirection = field.direction.reverse(); + + RelFieldCollation reversedField = new RelFieldCollation( + field.getFieldIndex(), + reversedDirection, + field.nullDirection + ); + reversedFields.add(reversedField); + } - static List getSelectColumns(List rexNodes) { - final List selectedColumns = new ArrayList<>(); - final RexVisitorImpl visitor = - new RexVisitorImpl(true) { - @Override - public Void visitInputRef(RexInputRef inputRef) { - if (!selectedColumns.contains(inputRef.getIndex())) { - selectedColumns.add(inputRef.getIndex()); - } - return null; - } - }; - visitor.visitEach(rexNodes); - return selectedColumns; + return RelCollations.of(reversedFields); } } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index d3a533a2649..a0d996379d4 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -207,7 +207,7 @@ public void testFilterFunctionScriptPushDownExplain() throws Exception { public void testExplainWithReverse() throws IOException { String result = executeWithReplace( - "explain source=opensearch-sql_test_index_account | sort age | reverse | head 5"); + "explain source=opensearch-sql_test_index_account | reverse | head 5"); // Verify that the plan contains a LogicalSort with fetch (from head 5) assertTrue(result.contains("LogicalSort") && result.contains("fetch=[5]")); @@ -216,6 +216,37 @@ public void testExplainWithReverse() throws IOException { assertTrue(result.contains("ROW_NUMBER()")); assertTrue(result.contains("dir0=[DESC]")); } + + @Test + public void testExplainWithReversePushdown() throws IOException { + // Test with a sort operation that should use the reverse pushdown optimization + String result = + executeWithReplace( + "explain source=opensearch-sql_test_index_account | sort - age | reverse"); + + // Verify that the plan contains a LogicalSort with ascending direction (reversed from DESC) + assertTrue(result.contains("LogicalSort")); + assertTrue(result.contains("dir0=[ASC]")); + + // Verify that ROW_NUMBER is NOT used (since we're using the collation-based optimization) + assertFalse(result.contains("ROW_NUMBER()")); + } + + @Test + public void testExplainWithReversePushdownMultipleFields() throws IOException { + // Test with multiple sort fields that should use the reverse pushdown optimization + String result = + executeWithReplace( + "explain source=opensearch-sql_test_index_account | sort - age, + firstname | reverse"); + + // Verify that the plan contains a LogicalSort with reversed directions + assertTrue(result.contains("LogicalSort")); + assertTrue(result.contains("dir0=[ASC]")); // age was DESC, now ASC + assertTrue(result.contains("dir1=[DESC]")); // firstname was ASC, now DESC + + // Verify that ROW_NUMBER is NOT used (since we're using the collation-based optimization) + assertFalse(result.contains("ROW_NUMBER()")); + } @Test public void testExplainWithTimechartAvg() throws IOException { diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteReverseCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteReverseCommandIT.java index 5ff41bcb3f5..f34b7fdf81f 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteReverseCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteReverseCommandIT.java @@ -97,14 +97,68 @@ public void testReverseWithComplexPipeline() throws IOException { } @Test - public void testReverseWithMultipleSorts() throws IOException { - // Use the existing BANK data but with a simpler, more predictable query + public void testReverseWithDescendingSort() throws IOException { + // Test reverse with descending sort (- age) JSONObject result = executeQuery( String.format( - "source=%s | sort account_number | fields account_number | reverse | head 3", + "source=%s | sort - account_number | fields account_number | reverse", TEST_INDEX_BANK)); verifySchema(result, schema("account_number", "bigint")); - verifyDataRowsInOrder(result, rows(32), rows(25), rows(20)); + verifyDataRowsInOrder( + result, rows(1), rows(6), rows(13), rows(18), rows(20), rows(25), rows(32)); + } + + @Test + public void testReverseWithMixedSortDirections() throws IOException { + // Test reverse with mixed sort directions (- age, + firstname) + JSONObject result = + executeQuery( + String.format( + "source=%s | sort - account_number, + firstname | fields account_number, firstname | reverse", + TEST_INDEX_BANK)); + verifySchema(result, schema("account_number", "bigint"), schema("firstname", "string")); + verifyDataRowsInOrder( + result, + rows(1, "Amber JOHnny"), + rows(6, "Hattie"), + rows(13, "Nanette"), + rows(18, "Dale"), + rows(20, "Elinor"), + rows(25, "Virginia"), + rows(32, "Dillard")); + } + + @Test + public void testDoubleReverseWithDescendingSort() throws IOException { + // Test double reverse with descending sort (- age) + JSONObject result = + executeQuery( + String.format( + "source=%s | sort - account_number | fields account_number | reverse | reverse", + TEST_INDEX_BANK)); + verifySchema(result, schema("account_number", "bigint")); + verifyDataRowsInOrder( + result, rows(32), rows(25), rows(20), rows(18), rows(13), rows(6), rows(1)); + } + + @Test + public void testDoubleReverseWithMixedSortDirections() throws IOException { + // Test double reverse with mixed sort directions (- age, + firstname) + JSONObject result = + executeQuery( + String.format( + "source=%s | sort - account_number, + firstname | fields account_number, firstname | reverse | reverse", + TEST_INDEX_BANK)); + verifySchema(result, schema("account_number", "bigint"), schema("firstname", "string")); + verifyDataRowsInOrder( + result, + rows(32, "Dillard"), + rows(25, "Virginia"), + rows(20, "Elinor"), + rows(18, "Dale"), + rows(13, "Nanette"), + rows(6, "Hattie"), + rows(1, "Amber JOHnny")); } } From 6cc5a69195e0e02677dafa8d4a47e561d660a062 Mon Sep 17 00:00:00 2001 From: Selina Song Date: Mon, 18 Aug 2025 16:03:26 -0700 Subject: [PATCH 02/16] Sort collation implemented, added ExplainIT Signed-off-by: Selina Song --- .../sql/calcite/remote/CalciteExplainIT.java | 31 +++++-------------- .../explain_reverse_pushdown_multiple.json | 6 ++++ .../explain_reverse_pushdown_single.json | 6 ++++ 3 files changed, 20 insertions(+), 23 deletions(-) create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_pushdown_multiple.json create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_pushdown_single.json diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index a0d996379d4..138c2882062 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -219,33 +219,18 @@ public void testExplainWithReverse() throws IOException { @Test public void testExplainWithReversePushdown() throws IOException { - // Test with a sort operation that should use the reverse pushdown optimization - String result = - executeWithReplace( - "explain source=opensearch-sql_test_index_account | sort - age | reverse"); - - // Verify that the plan contains a LogicalSort with ascending direction (reversed from DESC) - assertTrue(result.contains("LogicalSort")); - assertTrue(result.contains("dir0=[ASC]")); - - // Verify that ROW_NUMBER is NOT used (since we're using the collation-based optimization) - assertFalse(result.contains("ROW_NUMBER()")); + String query = "source=opensearch-sql_test_index_account | sort - age | reverse"; + var result = explainQueryToString(query); + String expected = loadFromFile("expectedOutput/calcite/explain_reverse_pushdown_single.json"); + assertJsonEqualsIgnoreId(expected, result); } @Test public void testExplainWithReversePushdownMultipleFields() throws IOException { - // Test with multiple sort fields that should use the reverse pushdown optimization - String result = - executeWithReplace( - "explain source=opensearch-sql_test_index_account | sort - age, + firstname | reverse"); - - // Verify that the plan contains a LogicalSort with reversed directions - assertTrue(result.contains("LogicalSort")); - assertTrue(result.contains("dir0=[ASC]")); // age was DESC, now ASC - assertTrue(result.contains("dir1=[DESC]")); // firstname was ASC, now DESC - - // Verify that ROW_NUMBER is NOT used (since we're using the collation-based optimization) - assertFalse(result.contains("ROW_NUMBER()")); + String query = "source=opensearch-sql_test_index_account | sort - age, + firstname | reverse"; + var result = explainQueryToString(query); + String expected = loadFromFile("expectedOutput/calcite/explain_reverse_pushdown_multiple.json"); + assertJsonEqualsIgnoreId(expected, result); } @Test diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_pushdown_multiple.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_pushdown_multiple.json new file mode 100644 index 00000000000..1a7e0cee6c7 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_pushdown_multiple.json @@ -0,0 +1,6 @@ +{ + "calcite": { + "logical": "LogicalSystemLimit(sort0=[$8], sort1=[$1], dir0=[ASC], dir1=[DESC], fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalSort(sort0=[$8], sort1=[$1], dir0=[ASC], dir1=[DESC])\n LogicalSort(sort0=[$8], sort1=[$1], dir0=[DESC-nulls-last], dir1=[ASC-nulls-first])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", + "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SORT->[{\n \"age\" : {\n \"order\" : \"asc\",\n \"missing\" : \"_last\"\n }\n}, {\n \"firstname.keyword\" : {\n \"order\" : \"desc\",\n \"missing\" : \"_first\"\n }\n}], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]},\"sort\":[{\"age\":{\"order\":\"asc\",\"missing\":\"_last\"}},{\"firstname.keyword\":{\"order\":\"desc\",\"missing\":\"_first\"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" + } +} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_pushdown_single.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_pushdown_single.json new file mode 100644 index 00000000000..da588569e1c --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_pushdown_single.json @@ -0,0 +1,6 @@ +{ + "calcite": { + "logical": "LogicalSystemLimit(sort0=[$8], dir0=[ASC], fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalSort(sort0=[$8], dir0=[ASC])\n LogicalSort(sort0=[$8], dir0=[DESC-nulls-last])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", + "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SORT->[{\n \"age\" : {\n \"order\" : \"asc\",\n \"missing\" : \"_last\"\n }\n}], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]},\"sort\":[{\"age\":{\"order\":\"asc\",\"missing\":\"_last\"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" + } +} \ No newline at end of file From a5b3df8790190d49dd603d0d6fb1a562536aad04 Mon Sep 17 00:00:00 2001 From: Selina Song Date: Mon, 18 Aug 2025 16:04:25 -0700 Subject: [PATCH 03/16] spotless Signed-off-by: Selina Song --- .../opensearch/sql/calcite/CalciteRelNodeVisitor.java | 4 ++-- .../java/org/opensearch/sql/calcite/utils/PlanUtils.java | 9 +++------ .../opensearch/sql/calcite/remote/CalciteExplainIT.java | 7 +++---- .../sql/calcite/remote/CalciteReverseCommandIT.java | 6 ++++-- 4 files changed, 12 insertions(+), 14 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 3346272dbef..f1d3f90eb7f 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -569,7 +569,7 @@ public RelNode visitHead(Head node, CalcitePlanContext context) { public RelNode visitReverse( org.opensearch.sql.ast.tree.Reverse node, CalcitePlanContext context) { visitChildren(node, context); - + RelCollation collation = context.relBuilder.peek().getTraitSet().getCollation(); if (collation == null || collation == RelCollations.EMPTY) { // If no collation exists, use the traditional row_number approach @@ -591,7 +591,7 @@ public RelNode visitReverse( RelCollation reversedCollation = PlanUtils.reverseCollation(collation); context.relBuilder.sort(reversedCollation); } - + return context.relBuilder.peek(); } diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java index 056e5b38358..b5f44b1b54b 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java @@ -372,12 +372,9 @@ public static RelCollation reverseCollation(RelCollation original) { List reversedFields = new ArrayList<>(); for (RelFieldCollation field : original.getFieldCollations()) { RelFieldCollation.Direction reversedDirection = field.direction.reverse(); - - RelFieldCollation reversedField = new RelFieldCollation( - field.getFieldIndex(), - reversedDirection, - field.nullDirection - ); + + RelFieldCollation reversedField = + new RelFieldCollation(field.getFieldIndex(), reversedDirection, field.nullDirection); reversedFields.add(reversedField); } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index 138c2882062..627bbad7e7f 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -206,8 +206,7 @@ public void testFilterFunctionScriptPushDownExplain() throws Exception { @Test public void testExplainWithReverse() throws IOException { String result = - executeWithReplace( - "explain source=opensearch-sql_test_index_account | reverse | head 5"); + executeWithReplace("explain source=opensearch-sql_test_index_account | reverse | head 5"); // Verify that the plan contains a LogicalSort with fetch (from head 5) assertTrue(result.contains("LogicalSort") && result.contains("fetch=[5]")); @@ -216,7 +215,7 @@ public void testExplainWithReverse() throws IOException { assertTrue(result.contains("ROW_NUMBER()")); assertTrue(result.contains("dir0=[DESC]")); } - + @Test public void testExplainWithReversePushdown() throws IOException { String query = "source=opensearch-sql_test_index_account | sort - age | reverse"; @@ -224,7 +223,7 @@ public void testExplainWithReversePushdown() throws IOException { String expected = loadFromFile("expectedOutput/calcite/explain_reverse_pushdown_single.json"); assertJsonEqualsIgnoreId(expected, result); } - + @Test public void testExplainWithReversePushdownMultipleFields() throws IOException { String query = "source=opensearch-sql_test_index_account | sort - age, + firstname | reverse"; diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteReverseCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteReverseCommandIT.java index f34b7fdf81f..b9946f1b19d 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteReverseCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteReverseCommandIT.java @@ -115,7 +115,8 @@ public void testReverseWithMixedSortDirections() throws IOException { JSONObject result = executeQuery( String.format( - "source=%s | sort - account_number, + firstname | fields account_number, firstname | reverse", + "source=%s | sort - account_number, + firstname | fields account_number, firstname" + + " | reverse", TEST_INDEX_BANK)); verifySchema(result, schema("account_number", "bigint"), schema("firstname", "string")); verifyDataRowsInOrder( @@ -148,7 +149,8 @@ public void testDoubleReverseWithMixedSortDirections() throws IOException { JSONObject result = executeQuery( String.format( - "source=%s | sort - account_number, + firstname | fields account_number, firstname | reverse | reverse", + "source=%s | sort - account_number, + firstname | fields account_number, firstname" + + " | reverse | reverse", TEST_INDEX_BANK)); verifySchema(result, schema("account_number", "bigint"), schema("firstname", "string")); verifyDataRowsInOrder( From 5ab39f9e02169f5b8c4ac7e1595596fef07935bb Mon Sep 17 00:00:00 2001 From: Selina Song Date: Mon, 18 Aug 2025 16:34:47 -0700 Subject: [PATCH 04/16] Add sort double reverse no op test Signed-off-by: Selina Song --- .../opensearch/sql/calcite/remote/CalciteExplainIT.java | 9 +++++++++ .../calcite/explain_double_reverse_sort_no_op.json | 6 ++++++ 2 files changed, 15 insertions(+) create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_double_reverse_sort_no_op.json diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index 627bbad7e7f..fa564bf3083 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -252,6 +252,15 @@ public void testExplainWithTimechartCount() throws IOException { assertJsonEqualsIgnoreId(expected, result); } + @Test + public void testDoubleReverseWithSortNoOp() throws IOException { + String query = + "source=opensearch-sql_test_index_account | sort - age, + firstname | reverse | reverse"; + var result = explainQueryToString(query); + String expected = loadFromFile("expectedOutput/calcite/explain_double_reverse_sort_no_op.json"); + assertJsonEqualsIgnoreId(expected, result); + } + @Test public void noPushDownForAggOnWindow() throws IOException { Assume.assumeTrue("This test is only for push down enabled", isPushdownEnabled()); diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_double_reverse_sort_no_op.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_double_reverse_sort_no_op.json new file mode 100644 index 00000000000..c45612927b1 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_double_reverse_sort_no_op.json @@ -0,0 +1,6 @@ +{ + "calcite": { + "logical": "LogicalSystemLimit(sort0=[$8], sort1=[$1], dir0=[DESC-nulls-last], dir1=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalSort(sort0=[$8], sort1=[$1], dir0=[DESC-nulls-last], dir1=[ASC-nulls-first])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", + "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SORT->[{\n \"age\" : {\n \"order\" : \"desc\",\n \"missing\" : \"_last\"\n }\n}, {\n \"firstname.keyword\" : {\n \"order\" : \"asc\",\n \"missing\" : \"_first\"\n }\n}], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]},\"sort\":[{\"age\":{\"order\":\"desc\",\"missing\":\"_last\"}},{\"firstname.keyword\":{\"order\":\"asc\",\"missing\":\"_first\"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" + } +} \ No newline at end of file From 3a44688283c3e7aaecf6a777591f2ef7fefc1bbc Mon Sep 17 00:00:00 2001 From: Selina Song Date: Tue, 19 Aug 2025 09:15:46 -0700 Subject: [PATCH 05/16] update logical plan tests Signed-off-by: Selina Song --- .../ppl/calcite/CalcitePPLReverseTest.java | 76 ++++--------------- 1 file changed, 15 insertions(+), 61 deletions(-) diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReverseTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReverseTest.java index 179fb3bc830..81f65154d25 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReverseTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReverseTest.java @@ -19,13 +19,8 @@ public void testReverseParserSuccess() { String ppl = "source=EMP | reverse"; RelNode root = getRelNode(ppl); String expectedLogical = - "" - + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," - + " COMM=[$6], DEPTNO=[$7])\n" - + " LogicalSort(sort0=[$8], dir0=[DESC])\n" - + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," - + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __reverse_row_num__=[ROW_NUMBER() OVER ()])\n" - + " LogicalTableScan(table=[[scott, EMP]])\n"; + "LogicalSort(sort0=[$0], dir0=[DESC-nulls-last])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedResult = @@ -59,13 +54,7 @@ public void testReverseParserSuccess() { + " COMM=null; DEPTNO=20\n"; verifyResult(root, expectedResult); - String expectedSparkSql = - "" - + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`\n" - + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," - + " ROW_NUMBER() OVER () `__reverse_row_num__`\n" - + "FROM `scott`.`EMP`\n" - + "ORDER BY 9 DESC NULLS FIRST) `t0`"; + String expectedSparkSql = "SELECT *\n" + "FROM `scott`.`EMP`\n" + "ORDER BY `EMPNO` DESC"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -74,24 +63,17 @@ public void testReverseWithSortParserSuccess() { String ppl = "source=EMP | sort ENAME | reverse"; RelNode root = getRelNode(ppl); String expectedLogical = - "" - + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," - + " COMM=[$6], DEPTNO=[$7])\n" - + " LogicalSort(sort0=[$8], dir0=[DESC])\n" - + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," - + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __reverse_row_num__=[ROW_NUMBER() OVER ()])\n" - + " LogicalSort(sort0=[$1], dir0=[ASC-nulls-first])\n" - + " LogicalTableScan(table=[[scott, EMP]])\n"; + "LogicalSort(sort0=[$1], dir0=[DESC])\n" + + " LogicalSort(sort0=[$1], dir0=[ASC-nulls-first])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = - "" - + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`\n" - + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," - + " ROW_NUMBER() OVER () `__reverse_row_num__`\n" + "SELECT *\n" + + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`\n" + "FROM `scott`.`EMP`\n" - + "ORDER BY `ENAME`) `t0`\n" - + "ORDER BY `__reverse_row_num__` DESC NULLS FIRST"; + + "ORDER BY `ENAME`) `t`\n" + + "ORDER BY `ENAME` DESC NULLS FIRST"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -99,28 +81,10 @@ public void testReverseWithSortParserSuccess() { public void testDoubleReverseParserSuccess() { String ppl = "source=EMP | reverse | reverse"; RelNode root = getRelNode(ppl); - String expectedLogical = - "" - + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," - + " COMM=[$6], DEPTNO=[$7])\n" - + " LogicalSort(sort0=[$8], dir0=[DESC])\n" - + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," - + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __reverse_row_num__=[ROW_NUMBER() OVER ()])\n" - + " LogicalSort(sort0=[$8], dir0=[DESC])\n" - + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," - + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __reverse_row_num__=[ROW_NUMBER() OVER ()])\n" - + " LogicalTableScan(table=[[scott, EMP]])\n"; + String expectedLogical = "LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); - String expectedSparkSql = - "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`\n" - + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," - + " ROW_NUMBER() OVER () `__reverse_row_num__`\n" - + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," - + " ROW_NUMBER() OVER () `__reverse_row_num__`\n" - + "FROM `scott`.`EMP`\n" - + "ORDER BY 9 DESC NULLS FIRST) `t0`\n" - + "ORDER BY 9 DESC NULLS FIRST) `t2`"; + String expectedSparkSql = "SELECT *\n" + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -129,13 +93,8 @@ public void testReverseWithHeadParserSuccess() { String ppl = "source=EMP | reverse | head 2"; RelNode root = getRelNode(ppl); String expectedLogical = - "" - + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," - + " COMM=[$6], DEPTNO=[$7])\n" - + " LogicalSort(sort0=[$8], dir0=[DESC], fetch=[2])\n" - + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," - + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __reverse_row_num__=[ROW_NUMBER() OVER ()])\n" - + " LogicalTableScan(table=[[scott, EMP]])\n"; + "LogicalSort(sort0=[$0], dir0=[DESC-nulls-last], fetch=[2])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedResult = @@ -146,12 +105,7 @@ public void testReverseWithHeadParserSuccess() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`\n" - + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," - + " ROW_NUMBER() OVER () `__reverse_row_num__`\n" - + "FROM `scott`.`EMP`\n" - + "ORDER BY 9 DESC NULLS FIRST\n" - + "LIMIT 2) `t0`"; + "SELECT *\n" + "FROM `scott`.`EMP`\n" + "ORDER BY `EMPNO` DESC\n" + "LIMIT 2"; verifyPPLToSparkSQL(root, expectedSparkSql); } From 4343b47b9341e8e058ebdf1619ba733e5e79bbe8 Mon Sep 17 00:00:00 2001 From: Selina Song Date: Mon, 8 Sep 2025 14:41:12 -0700 Subject: [PATCH 06/16] add opt rule for sort flip Signed-off-by: Selina Song --- .../sql/calcite/CalciteRelNodeVisitor.java | 25 ++++++++-------- .../sql/calcite/plan/OpenSearchRules.java | 9 +++++- .../sql/calcite/plan/OpenSearchTableScan.java | 5 ++++ .../sql/calcite/utils/PlanUtils.java | 11 ++++++- .../sql/calcite/remote/CalciteExplainIT.java | 30 +++++++++++++++++++ .../calcite/explain_double_reverse_no_op.json | 6 ++++ .../calcite/explain_limit_then_sort_push.json | 2 +- .../explain_reverse_pushdown_multiple.json | 4 +-- .../explain_reverse_pushdown_single.json | 4 +-- .../scan/CalciteEnumerableIndexScan.java | 4 +++ .../ppl/calcite/CalcitePPLReverseTest.java | 29 +++++++++++++----- 11 files changed, 102 insertions(+), 27 deletions(-) create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_double_reverse_no_op.json diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index f1d3f90eb7f..f3086a8d6f9 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -569,11 +569,18 @@ public RelNode visitHead(Head node, CalcitePlanContext context) { public RelNode visitReverse( org.opensearch.sql.ast.tree.Reverse node, CalcitePlanContext context) { visitChildren(node, context); - - RelCollation collation = context.relBuilder.peek().getTraitSet().getCollation(); - if (collation == null || collation == RelCollations.EMPTY) { - // If no collation exists, use the traditional row_number approach - // Add ROW_NUMBER() column + + // Check if there's an existing sort to reverse + List collations = + context.relBuilder.getCluster().getMetadataQuery().collations(context.relBuilder.peek()); + RelCollation collation = collations != null && !collations.isEmpty() ? collations.get(0) : null; + + if (collation != null && !collation.getFieldCollations().isEmpty()) { + // If there's an existing sort, reverse its direction + RelCollation reversedCollation = PlanUtils.reverseCollation(collation); + context.relBuilder.sort(reversedCollation); + } else { + // Fallback: use ROW_NUMBER approach when no existing sort RexNode rowNumber = context .relBuilder @@ -582,16 +589,10 @@ public RelNode visitReverse( .rowsTo(RexWindowBounds.CURRENT_ROW) .as(REVERSE_ROW_NUM); context.relBuilder.projectPlus(rowNumber); - // Sort by row number descending context.relBuilder.sort(context.relBuilder.desc(context.relBuilder.field(REVERSE_ROW_NUM))); - // Remove row number column context.relBuilder.projectExcept(context.relBuilder.field(REVERSE_ROW_NUM)); - } else { - // If collation exists, reverse the sort direction - RelCollation reversedCollation = PlanUtils.reverseCollation(collation); - context.relBuilder.sort(reversedCollation); } - + return context.relBuilder.peek(); } diff --git a/core/src/main/java/org/opensearch/sql/calcite/plan/OpenSearchRules.java b/core/src/main/java/org/opensearch/sql/calcite/plan/OpenSearchRules.java index 3f64e23493d..043f8066a48 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/plan/OpenSearchRules.java +++ b/core/src/main/java/org/opensearch/sql/calcite/plan/OpenSearchRules.java @@ -8,12 +8,19 @@ import com.google.common.collect.ImmutableList; import java.util.List; import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.rel.convert.ConverterRule; +import org.opensearch.sql.calcite.rule.SortReverseOptimizationRule; public class OpenSearchRules { private static final PPLAggregateConvertRule AGGREGATE_CONVERT_RULE = PPLAggregateConvertRule.Config.SUM_CONVERTER.toRule(); - public static final List OPEN_SEARCH_OPT_RULES = + public static final List OPEN_SEARCH_OPT_RULES = ImmutableList.of(); + + public static final List OPTIMIZATION_RULES = + ImmutableList.of(SortReverseOptimizationRule.INSTANCE); + + public static final List OPEN_SEARCH_OPT_RULES_OLD = ImmutableList.of(AGGREGATE_CONVERT_RULE); // prevent instantiation diff --git a/core/src/main/java/org/opensearch/sql/calcite/plan/OpenSearchTableScan.java b/core/src/main/java/org/opensearch/sql/calcite/plan/OpenSearchTableScan.java index 03678713ef4..8a613c57abe 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/plan/OpenSearchTableScan.java +++ b/core/src/main/java/org/opensearch/sql/calcite/plan/OpenSearchTableScan.java @@ -32,6 +32,11 @@ public void register(RelOptPlanner planner) { for (RelOptRule rule : OpenSearchRules.OPEN_SEARCH_OPT_RULES) { planner.addRule(rule); } + + // Register optimization rules + for (RelOptRule rule : OpenSearchRules.OPTIMIZATION_RULES) { + planner.addRule(rule); + } // remove this rule otherwise opensearch can't correctly interpret approx_count_distinct() // it is converted to cardinality aggregation in OpenSearch diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java index b5f44b1b54b..343d16588df 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java @@ -372,9 +372,18 @@ public static RelCollation reverseCollation(RelCollation original) { List reversedFields = new ArrayList<>(); for (RelFieldCollation field : original.getFieldCollations()) { RelFieldCollation.Direction reversedDirection = field.direction.reverse(); + + // Handle null direction properly - reverse it as well + RelFieldCollation.NullDirection reversedNullDirection = field.nullDirection; + if (reversedNullDirection == RelFieldCollation.NullDirection.FIRST) { + reversedNullDirection = RelFieldCollation.NullDirection.LAST; + } else if (reversedNullDirection == RelFieldCollation.NullDirection.LAST) { + reversedNullDirection = RelFieldCollation.NullDirection.FIRST; + } + // UNSPECIFIED remains UNSPECIFIED RelFieldCollation reversedField = - new RelFieldCollation(field.getFieldIndex(), reversedDirection, field.nullDirection); + new RelFieldCollation(field.getFieldIndex(), reversedDirection, reversedNullDirection); reversedFields.add(reversedField); } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index fa564bf3083..987ff3b9a38 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -232,6 +232,7 @@ public void testExplainWithReversePushdownMultipleFields() throws IOException { assertJsonEqualsIgnoreId(expected, result); } +<<<<<<< HEAD @Test public void testExplainWithTimechartAvg() throws IOException { var result = explainQueryToString("source=events | timechart span=1m avg(cpu_usage) by host"); @@ -260,6 +261,35 @@ public void testDoubleReverseWithSortNoOp() throws IOException { String expected = loadFromFile("expectedOutput/calcite/explain_double_reverse_sort_no_op.json"); assertJsonEqualsIgnoreId(expected, result); } +======= +// @Test +// public void testDoubleReverseNoOp() throws IOException { +// String query = +// "source=opensearch-sql_test_index_account | fields account_number | reverse | reverse"; +// var result = explainQueryToString(query); +// String expected = loadFromFile("expectedOutput/calcite/explain_double_reverse_no_op.json"); +// assertJsonEqualsIgnoreId(expected, result); +// } + +// @Test +// public void testTripleReverseOneOp() throws IOException { +// String query = +// "source=opensearch-sql_test_index_account | fields account_number | reverse | reverse |" +// + " reverse"; +// var result = explainQueryToString(query); +// String expected = loadFromFile("expectedOutput/calcite/explain_triple_reverse_one_op.json"); +// assertJsonEqualsIgnoreId(expected, result); +// } +// +// @Test +// public void testDoubleReverseWithSortNoOp() throws IOException { +// String query = +// "source=opensearch-sql_test_index_account | sort - age, + firstname | reverse | reverse"; +// var result = explainQueryToString(query); +// String expected = loadFromFile("expectedOutput/calcite/explain_double_reverse_sort_no_op.json"); +// assertJsonEqualsIgnoreId(expected, result); +// } +>>>>>>> f6a6803f3 (add opt rule for sort flip) @Test public void noPushDownForAggOnWindow() throws IOException { diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_double_reverse_no_op.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_double_reverse_no_op.json new file mode 100644 index 00000000000..d4e60c4b843 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_double_reverse_no_op.json @@ -0,0 +1,6 @@ +{ + "calcite": { + "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(cpu_usage=[$0], @timestamp=[$1])\n LogicalSort(sort0=[$8], dir0=[DESC])\n LogicalProject(cpu_usage=[$0], @timestamp=[$1], _id=[$2], _index=[$3], _score=[$4], _maxscore=[$5], _sort=[$6], _routing=[$7], __reverse_row_num__=[ROW_NUMBER() OVER ()])\n LogicalSort(sort0=[$8], dir0=[DESC])\n LogicalProject(cpu_usage=[$0], @timestamp=[$1], _id=[$2], _index=[$3], _score=[$4], _maxscore=[$5], _sort=[$6], _routing=[$7], __reverse_row_num__=[ROW_NUMBER() OVER ()])\n CalciteLogicalIndexScan(table=[[OpenSearch, events]])\n", + "physical": "EnumerableCalc(expr#0..3=[{inputs}], proj#0..1=[{exprs}])\n EnumerableLimit(fetch=[10000])\n EnumerableSort(sort0=[$3], dir0=[DESC])\n EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])])\n EnumerableSort(sort0=[$2], dir0=[DESC])\n EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])])\n CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[PROJECT->[cpu_usage, @timestamp]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"cpu_usage\",\"@timestamp\"],\"excludes\":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + } +} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_then_sort_push.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_then_sort_push.json index f73adcb19a9..ebe7930c2d3 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_then_sort_push.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_then_sort_push.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(sort0=[$0], dir0=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(age=[$8])\n LogicalSort(sort0=[$8], dir0=[ASC-nulls-first])\n LogicalSort(fetch=[5])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n EnumerableSort(sort0=[$0], dir0=[ASC-nulls-first])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[LIMIT->5, PROJECT->[age]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":5,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"age\"],\"excludes\":[]}}, requestedTotalSize=5, pageSize=null, startFrom=0)])\n" + "physical": "EnumerableSort(sort0=[$0], dir0=[ASC-nulls-first])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[LIMIT->5, PROJECT->[age], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":5,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"age\"],\"excludes\":[]}}, requestedTotalSize=5, pageSize=null, startFrom=0)])\n" } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_pushdown_multiple.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_pushdown_multiple.json index 1a7e0cee6c7..19afc66a6aa 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_pushdown_multiple.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_pushdown_multiple.json @@ -1,6 +1,6 @@ { "calcite": { - "logical": "LogicalSystemLimit(sort0=[$8], sort1=[$1], dir0=[ASC], dir1=[DESC], fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalSort(sort0=[$8], sort1=[$1], dir0=[ASC], dir1=[DESC])\n LogicalSort(sort0=[$8], sort1=[$1], dir0=[DESC-nulls-last], dir1=[ASC-nulls-first])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SORT->[{\n \"age\" : {\n \"order\" : \"asc\",\n \"missing\" : \"_last\"\n }\n}, {\n \"firstname.keyword\" : {\n \"order\" : \"desc\",\n \"missing\" : \"_first\"\n }\n}], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]},\"sort\":[{\"age\":{\"order\":\"asc\",\"missing\":\"_last\"}},{\"firstname.keyword\":{\"order\":\"desc\",\"missing\":\"_first\"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" + "logical": "LogicalSystemLimit(sort0=[$8], sort1=[$1], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalSort(sort0=[$8], sort1=[$1], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last])\n LogicalSort(sort0=[$8], sort1=[$1], dir0=[DESC-nulls-last], dir1=[ASC-nulls-first])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", + "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SORT->[{\n \"age\" : {\n \"order\" : \"asc\",\n \"missing\" : \"_first\"\n }\n}, {\n \"firstname.keyword\" : {\n \"order\" : \"desc\",\n \"missing\" : \"_last\"\n }\n}], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]},\"sort\":[{\"age\":{\"order\":\"asc\",\"missing\":\"_first\"}},{\"firstname.keyword\":{\"order\":\"desc\",\"missing\":\"_last\"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" } } \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_pushdown_single.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_pushdown_single.json index da588569e1c..cf3c7f40ce3 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_pushdown_single.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_pushdown_single.json @@ -1,6 +1,6 @@ { "calcite": { - "logical": "LogicalSystemLimit(sort0=[$8], dir0=[ASC], fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalSort(sort0=[$8], dir0=[ASC])\n LogicalSort(sort0=[$8], dir0=[DESC-nulls-last])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SORT->[{\n \"age\" : {\n \"order\" : \"asc\",\n \"missing\" : \"_last\"\n }\n}], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]},\"sort\":[{\"age\":{\"order\":\"asc\",\"missing\":\"_last\"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" + "logical": "LogicalSystemLimit(sort0=[$8], dir0=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalSort(sort0=[$8], dir0=[ASC-nulls-first])\n LogicalSort(sort0=[$8], dir0=[DESC-nulls-last])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", + "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SORT->[{\n \"age\" : {\n \"order\" : \"asc\",\n \"missing\" : \"_first\"\n }\n}], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]},\"sort\":[{\"age\":{\"order\":\"asc\",\"missing\":\"_first\"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" } } \ No newline at end of file diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableIndexScan.java index 33b80f61982..b3d0a587ef5 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableIndexScan.java @@ -73,6 +73,10 @@ public void register(RelOptPlanner planner) { for (RelOptRule rule : OpenSearchRules.OPEN_SEARCH_OPT_RULES) { planner.addRule(rule); } + + for (RelOptRule rule : OpenSearchRules.OPTIMIZATION_RULES) { + planner.addRule(rule); + } // remove this rule otherwise opensearch can't correctly interpret approx_count_distinct() // it is converted to cardinality aggregation in OpenSearch diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReverseTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReverseTest.java index 81f65154d25..1c8482c756c 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReverseTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReverseTest.java @@ -19,7 +19,7 @@ public void testReverseParserSuccess() { String ppl = "source=EMP | reverse"; RelNode root = getRelNode(ppl); String expectedLogical = - "LogicalSort(sort0=[$0], dir0=[DESC-nulls-last])\n" + "LogicalSort(sort0=[$0], dir0=[DESC])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); @@ -54,7 +54,7 @@ public void testReverseParserSuccess() { + " COMM=null; DEPTNO=20\n"; verifyResult(root, expectedResult); - String expectedSparkSql = "SELECT *\n" + "FROM `scott`.`EMP`\n" + "ORDER BY `EMPNO` DESC"; + String expectedSparkSql = "SELECT *\n" + "FROM `scott`.`EMP`\n" + "ORDER BY `EMPNO` DESC NULLS FIRST"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -62,8 +62,9 @@ public void testReverseParserSuccess() { public void testReverseWithSortParserSuccess() { String ppl = "source=EMP | sort ENAME | reverse"; RelNode root = getRelNode(ppl); + // Optimization rule may show double sorts in logical plan but physical execution is optimized String expectedLogical = - "LogicalSort(sort0=[$1], dir0=[DESC])\n" + "LogicalSort(sort0=[$1], dir0=[DESC-nulls-last])\n" + " LogicalSort(sort0=[$1], dir0=[ASC-nulls-first])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); @@ -73,7 +74,7 @@ public void testReverseWithSortParserSuccess() { + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`\n" + "FROM `scott`.`EMP`\n" + "ORDER BY `ENAME`) `t`\n" - + "ORDER BY `ENAME` DESC NULLS FIRST"; + + "ORDER BY `ENAME` DESC"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -81,10 +82,22 @@ public void testReverseWithSortParserSuccess() { public void testDoubleReverseParserSuccess() { String ppl = "source=EMP | reverse | reverse"; RelNode root = getRelNode(ppl); - String expectedLogical = "LogicalTableScan(table=[[scott, EMP]])\n"; + // TODO: fix logical plan to be no-op with opt rule + String expectedLogical = + "LogicalSort(sort0=[$8], dir0=[DESC])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], __reverse_row_num__=[ROW_NUMBER() OVER ()])\n" + + " LogicalSort(sort0=[$8], dir0=[DESC])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], __reverse_row_num__=[ROW_NUMBER() OVER ()])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); - String expectedSparkSql = "SELECT *\n" + "FROM `scott`.`EMP`"; + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`\n" + + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, ROW_NUMBER() OVER () AS `__reverse_row_num__`\n" + + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, ROW_NUMBER() OVER () AS `__reverse_row_num__`\n" + + "FROM `scott`.`EMP`) `t`\n" + + "ORDER BY `__reverse_row_num__` DESC) `t0`\n" + + "ORDER BY `__reverse_row_num__` DESC"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -93,7 +106,7 @@ public void testReverseWithHeadParserSuccess() { String ppl = "source=EMP | reverse | head 2"; RelNode root = getRelNode(ppl); String expectedLogical = - "LogicalSort(sort0=[$0], dir0=[DESC-nulls-last], fetch=[2])\n" + "LogicalSort(sort0=[$0], dir0=[DESC], fetch=[2])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); @@ -105,7 +118,7 @@ public void testReverseWithHeadParserSuccess() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT *\n" + "FROM `scott`.`EMP`\n" + "ORDER BY `EMPNO` DESC\n" + "LIMIT 2"; + "SELECT *\n" + "FROM `scott`.`EMP`\n" + "ORDER BY `EMPNO` DESC NULLS FIRST\n" + "LIMIT 2"; verifyPPLToSparkSQL(root, expectedSparkSql); } From 7fabbe6932532d0d3b4539311d5cb899145e2831 Mon Sep 17 00:00:00 2001 From: Selina Song Date: Mon, 8 Sep 2025 14:56:01 -0700 Subject: [PATCH 07/16] restore planutils after merge Signed-off-by: Selina Song --- .../sql/calcite/utils/PlanUtils.java | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java index 343d16588df..233c5cd6885 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java @@ -358,6 +358,43 @@ static RexNode derefMapCall(RexNode rexNode) { return rexNode; } + /** Check if contains RexOver */ + static boolean containsRowNumberDedup(LogicalProject project) { + return project.getProjects().stream() + .anyMatch(p -> p instanceof RexOver && p.getKind() == SqlKind.ROW_NUMBER); + } + + /** Get all RexWindow list from LogicalProject */ + static List getRexWindowFromProject(LogicalProject project) { + final List res = new ArrayList<>(); + final RexVisitorImpl visitor = + new RexVisitorImpl<>(true) { + @Override + public Void visitOver(RexOver over) { + res.add(over.getWindow()); + return null; + } + }; + visitor.visitEach(project.getProjects()); + return res; + } + + static List getSelectColumns(List rexNodes) { + final List selectedColumns = new ArrayList<>(); + final RexVisitorImpl visitor = + new RexVisitorImpl(true) { + @Override + public Void visitInputRef(RexInputRef inputRef) { + if (!selectedColumns.contains(inputRef.getIndex())) { + selectedColumns.add(inputRef.getIndex()); + } + return null; + } + }; + visitor.visitEach(rexNodes); + return selectedColumns; + } + /** * Reverses the direction of a RelCollation. * From 3ff4fa552a271398e75853d817302dfa96476720 Mon Sep 17 00:00:00 2001 From: Selina Song Date: Mon, 8 Sep 2025 16:36:35 -0700 Subject: [PATCH 08/16] fix conflict with aggregate opt rule Signed-off-by: Selina Song --- .../sql/calcite/CalciteRelNodeVisitor.java | 9 +- .../sql/calcite/plan/OpenSearchRules.java | 13 ++- .../sql/calcite/plan/OpenSearchTableScan.java | 7 +- .../rule/SortReverseOptimizationRule.java | 86 +++++++++++++------ .../sql/calcite/utils/PlanUtils.java | 2 +- .../sql/calcite/remote/CalciteExplainIT.java | 39 --------- .../explain_agg_with_sum_enhancement.json | 2 +- .../calcite/explain_limit_then_sort_push.json | 4 +- .../scan/CalciteEnumerableIndexScan.java | 2 +- .../ppl/calcite/CalcitePPLReverseTest.java | 27 +++--- 10 files changed, 93 insertions(+), 98 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index f3086a8d6f9..072300853c3 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -39,7 +39,6 @@ import org.apache.calcite.plan.RelOptTable; import org.apache.calcite.plan.ViewExpanders; import org.apache.calcite.rel.RelCollation; -import org.apache.calcite.rel.RelCollations; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Aggregate; import org.apache.calcite.rel.core.JoinRelType; @@ -569,12 +568,12 @@ public RelNode visitHead(Head node, CalcitePlanContext context) { public RelNode visitReverse( org.opensearch.sql.ast.tree.Reverse node, CalcitePlanContext context) { visitChildren(node, context); - + // Check if there's an existing sort to reverse - List collations = + List collations = context.relBuilder.getCluster().getMetadataQuery().collations(context.relBuilder.peek()); RelCollation collation = collations != null && !collations.isEmpty() ? collations.get(0) : null; - + if (collation != null && !collation.getFieldCollations().isEmpty()) { // If there's an existing sort, reverse its direction RelCollation reversedCollation = PlanUtils.reverseCollation(collation); @@ -592,7 +591,7 @@ public RelNode visitReverse( context.relBuilder.sort(context.relBuilder.desc(context.relBuilder.field(REVERSE_ROW_NUM))); context.relBuilder.projectExcept(context.relBuilder.field(REVERSE_ROW_NUM)); } - + return context.relBuilder.peek(); } diff --git a/core/src/main/java/org/opensearch/sql/calcite/plan/OpenSearchRules.java b/core/src/main/java/org/opensearch/sql/calcite/plan/OpenSearchRules.java index 043f8066a48..a563f6e9fa4 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/plan/OpenSearchRules.java +++ b/core/src/main/java/org/opensearch/sql/calcite/plan/OpenSearchRules.java @@ -8,21 +8,20 @@ import com.google.common.collect.ImmutableList; import java.util.List; import org.apache.calcite.plan.RelOptRule; -import org.apache.calcite.rel.convert.ConverterRule; import org.opensearch.sql.calcite.rule.SortReverseOptimizationRule; public class OpenSearchRules { private static final PPLAggregateConvertRule AGGREGATE_CONVERT_RULE = PPLAggregateConvertRule.Config.SUM_CONVERTER.toRule(); - public static final List OPEN_SEARCH_OPT_RULES = ImmutableList.of(); - - public static final List OPTIMIZATION_RULES = - ImmutableList.of(SortReverseOptimizationRule.INSTANCE); - - public static final List OPEN_SEARCH_OPT_RULES_OLD = + public static final List OPEN_SEARCH_OPT_RULES = ImmutableList.of(AGGREGATE_CONVERT_RULE); + public static final List OPTIMIZATION_RULES = ImmutableList.of(); + + public static final List OPEN_SEARCH_POST_AGG_RULES = + ImmutableList.of(SortReverseOptimizationRule.INSTANCE); + // prevent instantiation private OpenSearchRules() {} } diff --git a/core/src/main/java/org/opensearch/sql/calcite/plan/OpenSearchTableScan.java b/core/src/main/java/org/opensearch/sql/calcite/plan/OpenSearchTableScan.java index 8a613c57abe..da4804747ea 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/plan/OpenSearchTableScan.java +++ b/core/src/main/java/org/opensearch/sql/calcite/plan/OpenSearchTableScan.java @@ -32,12 +32,17 @@ public void register(RelOptPlanner planner) { for (RelOptRule rule : OpenSearchRules.OPEN_SEARCH_OPT_RULES) { planner.addRule(rule); } - + // Register optimization rules for (RelOptRule rule : OpenSearchRules.OPTIMIZATION_RULES) { planner.addRule(rule); } + // Register post-aggregation rules (run after aggregate optimizations) + for (RelOptRule rule : OpenSearchRules.OPEN_SEARCH_POST_AGG_RULES) { + planner.addRule(rule); + } + // remove this rule otherwise opensearch can't correctly interpret approx_count_distinct() // it is converted to cardinality aggregation in OpenSearch planner.removeRule(CoreRules.AGGREGATE_EXPAND_DISTINCT_AGGREGATES); diff --git a/core/src/main/java/org/opensearch/sql/calcite/rule/SortReverseOptimizationRule.java b/core/src/main/java/org/opensearch/sql/calcite/rule/SortReverseOptimizationRule.java index c5461f8ddaa..3f8340ceb1c 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/rule/SortReverseOptimizationRule.java +++ b/core/src/main/java/org/opensearch/sql/calcite/rule/SortReverseOptimizationRule.java @@ -8,20 +8,19 @@ import org.apache.calcite.plan.RelOptRule; import org.apache.calcite.plan.RelOptRuleCall; import org.apache.calcite.rel.logical.LogicalSort; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.opensearch.sql.calcite.plan.LogicalSystemLimit; -/** - * Optimization rule that eliminates redundant consecutive sorts on the same field. - * Detects: LogicalSort(field, direction1) -> LogicalSort(field, direction2) - * Converts to: LogicalSort(field, direction1) (keeps outer sort) - */ +/** Combines sort then reverse into 1 sort. */ public class SortReverseOptimizationRule extends RelOptRule { + private static final Logger LOG = LogManager.getLogger(SortReverseOptimizationRule.class); public static final SortReverseOptimizationRule INSTANCE = new SortReverseOptimizationRule(); private SortReverseOptimizationRule() { - super(operand(LogicalSort.class, - operand(LogicalSort.class, any())), + super( + operand(LogicalSort.class, operand(LogicalSort.class, any())), "SortReverseOptimizationRule"); } @@ -29,37 +28,72 @@ private SortReverseOptimizationRule() { public boolean matches(RelOptRuleCall call) { LogicalSort outerSort = call.rel(0); LogicalSort innerSort = call.rel(1); - - // Don't optimize if outer sort is a LogicalSystemLimit - we want to preserve system limits + + LOG.debug("SortReverseOptimizationRule.matches() called"); + LOG.debug("Outer sort: {}", outerSort); + LOG.debug("Inner sort: {}", innerSort); + LOG.debug("Inner sort input: {}", innerSort.getInput()); + + // Don't optimize if outer sort is a LogicalSystemLimit if (call.rel(0) instanceof LogicalSystemLimit) { + LOG.debug("Skipping: outer sort is LogicalSystemLimit"); + return false; + } + + // Don't optimize if inner sort has a fetch limit (head/limit before sort) + // This preserves limit-then-sort semantics + if (innerSort.fetch != null) { + LOG.debug("Skipping: inner sort has fetch limit: {}", innerSort.fetch); return false; } - - return hasSameField(outerSort, innerSort); + + // Must be same field with opposite directions (sort | reverse pattern) + boolean matches = hasSameFieldWithOppositeDirection(outerSort, innerSort); + LOG.debug("Same field with opposite direction: {}", matches); + return matches; } @Override public void onMatch(RelOptRuleCall call) { LogicalSort outerSort = call.rel(0); LogicalSort innerSort = call.rel(1); - - LogicalSort optimizedSort = LogicalSort.create( - innerSort.getInput(), - outerSort.getCollation(), - outerSort.offset, - outerSort.fetch); - + + LOG.debug("SortReverseOptimizationRule.onMatch() applying transformation"); + LOG.debug("Transforming from: {} -> {}", outerSort, innerSort); + + LogicalSort optimizedSort = + LogicalSort.create( + innerSort.getInput(), outerSort.getCollation(), outerSort.offset, outerSort.fetch); + + LOG.debug("Transformed to: {}", optimizedSort); call.transformTo(optimizedSort); } - - private boolean hasSameField(LogicalSort outerSort, LogicalSort innerSort) { - if (outerSort.getCollation().getFieldCollations().isEmpty() + + private boolean hasSameFieldWithOppositeDirection(LogicalSort outerSort, LogicalSort innerSort) { + if (outerSort.getCollation().getFieldCollations().isEmpty() || innerSort.getCollation().getFieldCollations().isEmpty()) { + LOG.debug("No field collations found"); return false; } - - int outerField = outerSort.getCollation().getFieldCollations().get(0).getFieldIndex(); - int innerField = innerSort.getCollation().getFieldCollations().get(0).getFieldIndex(); - return outerField == innerField; + + var outerField = outerSort.getCollation().getFieldCollations().get(0); + var innerField = innerSort.getCollation().getFieldCollations().get(0); + + LOG.debug( + "Outer field: index={}, direction={}", + outerField.getFieldIndex(), + outerField.getDirection()); + LOG.debug( + "Inner field: index={}, direction={}", + innerField.getFieldIndex(), + innerField.getDirection()); + + boolean sameField = outerField.getFieldIndex() == innerField.getFieldIndex(); + boolean oppositeDirection = outerField.getDirection() != innerField.getDirection(); + + LOG.debug("Same field: {}, Opposite direction: {}", sameField, oppositeDirection); + + // Must be same field with opposite directions + return sameField && oppositeDirection; } -} \ No newline at end of file +} diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java index 233c5cd6885..d67a34fba80 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java @@ -409,7 +409,7 @@ public static RelCollation reverseCollation(RelCollation original) { List reversedFields = new ArrayList<>(); for (RelFieldCollation field : original.getFieldCollations()) { RelFieldCollation.Direction reversedDirection = field.direction.reverse(); - + // Handle null direction properly - reverse it as well RelFieldCollation.NullDirection reversedNullDirection = field.nullDirection; if (reversedNullDirection == RelFieldCollation.NullDirection.FIRST) { diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index 987ff3b9a38..627bbad7e7f 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -232,7 +232,6 @@ public void testExplainWithReversePushdownMultipleFields() throws IOException { assertJsonEqualsIgnoreId(expected, result); } -<<<<<<< HEAD @Test public void testExplainWithTimechartAvg() throws IOException { var result = explainQueryToString("source=events | timechart span=1m avg(cpu_usage) by host"); @@ -253,44 +252,6 @@ public void testExplainWithTimechartCount() throws IOException { assertJsonEqualsIgnoreId(expected, result); } - @Test - public void testDoubleReverseWithSortNoOp() throws IOException { - String query = - "source=opensearch-sql_test_index_account | sort - age, + firstname | reverse | reverse"; - var result = explainQueryToString(query); - String expected = loadFromFile("expectedOutput/calcite/explain_double_reverse_sort_no_op.json"); - assertJsonEqualsIgnoreId(expected, result); - } -======= -// @Test -// public void testDoubleReverseNoOp() throws IOException { -// String query = -// "source=opensearch-sql_test_index_account | fields account_number | reverse | reverse"; -// var result = explainQueryToString(query); -// String expected = loadFromFile("expectedOutput/calcite/explain_double_reverse_no_op.json"); -// assertJsonEqualsIgnoreId(expected, result); -// } - -// @Test -// public void testTripleReverseOneOp() throws IOException { -// String query = -// "source=opensearch-sql_test_index_account | fields account_number | reverse | reverse |" -// + " reverse"; -// var result = explainQueryToString(query); -// String expected = loadFromFile("expectedOutput/calcite/explain_triple_reverse_one_op.json"); -// assertJsonEqualsIgnoreId(expected, result); -// } -// -// @Test -// public void testDoubleReverseWithSortNoOp() throws IOException { -// String query = -// "source=opensearch-sql_test_index_account | sort - age, + firstname | reverse | reverse"; -// var result = explainQueryToString(query); -// String expected = loadFromFile("expectedOutput/calcite/explain_double_reverse_sort_no_op.json"); -// assertJsonEqualsIgnoreId(expected, result); -// } ->>>>>>> f6a6803f3 (add opt rule for sort flip) - @Test public void noPushDownForAggOnWindow() throws IOException { Assume.assumeTrue("This test is only for push down enabled", isPushdownEnabled()); diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_with_sum_enhancement.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_with_sum_enhancement.json index 45d5b4a0544..d4a51309a40 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_with_sum_enhancement.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_with_sum_enhancement.json @@ -3,4 +3,4 @@ "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(sum(balance)=[$1], sum(balance + 100)=[$2], sum(balance - 100)=[$3], sum(balance * 100)=[$4], sum(balance / 100)=[$5], gender=[$0])\n LogicalAggregate(group=[{0}], sum(balance)=[SUM($1)], sum(balance + 100)=[SUM($2)], sum(balance - 100)=[SUM($3)], sum(balance * 100)=[SUM($4)], sum(balance / 100)=[SUM($5)])\n LogicalProject(gender=[$4], balance=[$7], $f6=[+($7, 100)], $f7=[-($7, 100)], $f8=[*($7, 100)], $f9=[DIVIDE($7, 100)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n", "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..3=[{inputs}], expr#4=[100], expr#5=[*($t2, $t4)], expr#6=[+($t1, $t5)], expr#7=[-($t1, $t5)], expr#8=[*($t1, $t4)], sum(balance)=[$t1], sum(balance + 100)=[$t6], sum(balance - 100)=[$t7], sum(balance * 100)=[$t8], sum(balance / 100)=[$t3], gender=[$t0])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},sum(balance)=SUM($1),sum(balance + 100)_COUNT=COUNT($1),sum(balance / 100)=SUM($2))], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"gender\":{\"terms\":{\"field\":\"gender.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\":{\"sum(balance)\":{\"sum\":{\"field\":\"balance\"}},\"sum(balance + 100)_COUNT\":{\"value_count\":{\"field\":\"balance\"}},\"sum(balance / 100)\":{\"sum\":{\"script\":{\"source\":\"{\\\"langType\\\":\\\"calcite\\\",\\\"script\\\":\\\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQHjnsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJhY2NvdW50X251bWJlciIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImZpcnN0bmFtZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImFkZHJlc3MiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJiaXJ0aGRhdGUiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJnZW5kZXIiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJjaXR5IgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAibGFzdG5hbWUiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJiYWxhbmNlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiZW1wbG95ZXIiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJzdGF0ZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIklOVEVHRVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJhZ2UiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJlbWFpbCIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIkJPT0xFQU4iLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJtYWxlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiX2lkIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiX2luZGV4IgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiUkVBTCIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJuYW1lIjogIl9zY29yZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlJFQUwiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJfbWF4c2NvcmUiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJfc29ydCIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogIl9yb3V0aW5nIgogICAgfQogIF0sCiAgIm51bGxhYmxlIjogdHJ1ZQp9dAAEZXhwcnQBz3sKICAib3AiOiB7CiAgICAibmFtZSI6ICJESVZJREUiLAogICAgImtpbmQiOiAiT1RIRVJfRlVOQ1RJT04iLAogICAgInN5bnRheCI6ICJGVU5DVElPTiIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImlucHV0IjogNywKICAgICAgIm5hbWUiOiAiJDciCiAgICB9LAogICAgewogICAgICAibGl0ZXJhbCI6IDEwMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiSU5URUdFUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UKICAgICAgfQogICAgfQogIF0sCiAgImNsYXNzIjogIm9yZy5vcGVuc2VhcmNoLnNxbC5leHByZXNzaW9uLmZ1bmN0aW9uLlVzZXJEZWZpbmVkRnVuY3Rpb25CdWlsZGVyJDEiLAogICJ0eXBlIjogewogICAgInR5cGUiOiAiQklHSU5UIiwKICAgICJudWxsYWJsZSI6IHRydWUKICB9LAogICJkZXRlcm1pbmlzdGljIjogdHJ1ZSwKICAiZHluYW1pYyI6IGZhbHNlCn10AApmaWVsZFR5cGVzc3IAF2phdmEudXRpbC5MaW5rZWRIYXNoTWFwNMBOXBBswPsCAAFaAAthY2Nlc3NPcmRlcnhyABFqYXZhLnV0aWwuSGFzaE1hcAUH2sHDFmDRAwACRgAKbG9hZEZhY3RvckkACXRocmVzaG9sZHhwP0AAAAAAABh3CAAAACAAAAANdAAOYWNjb3VudF9udW1iZXJ+cgApb3JnLm9wZW5zZWFyY2guc3FsLmRhdGEudHlwZS5FeHByQ29yZVR5cGUAAAAAAAAAABIAAHhyAA5qYXZhLmxhbmcuRW51bQAAAAAAAAAAEgAAeHB0AARMT05HdAAJZmlyc3RuYW1lfnEAfgALdAAGU1RSSU5HdAAHYWRkcmVzc3NyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaFRleHRUeXBlrYOjkwTjMUQCAAFMAAZmaWVsZHN0AA9MamF2YS91dGlsL01hcDt4cgA6b3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRhVHlwZcJjvMoC+gU1AgADTAAMZXhwckNvcmVUeXBldAArTG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL3R5cGUvRXhwckNvcmVUeXBlO0wAC21hcHBpbmdUeXBldABITG9yZy9vcGVuc2VhcmNoL3NxbC9vcGVuc2VhcmNoL2RhdGEvdHlwZS9PcGVuU2VhcmNoRGF0YVR5cGUkTWFwcGluZ1R5cGU7TAAKcHJvcGVydGllc3EAfgAUeHB+cQB+AAt0AAdVTktOT1dOfnIARm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoRGF0YVR5cGUkTWFwcGluZ1R5cGUAAAAAAAAAABIAAHhxAH4ADHQABFRleHRzcgA8c2hhZGVkLmNvbS5nb29nbGUuY29tbW9uLmNvbGxlY3QuSW1tdXRhYmxlTWFwJFNlcmlhbGl6ZWRGb3JtAAAAAAAAAAACAAJMAARrZXlzdAASTGphdmEvbGFuZy9PYmplY3Q7TAAGdmFsdWVzcQB+AB94cHVyABNbTGphdmEubGFuZy5PYmplY3Q7kM5YnxBzKWwCAAB4cAAAAAB1cQB+ACEAAAAAc3EAfgAAAAAAA3cEAAAAAHh0AAliaXJ0aGRhdGVzcgA6b3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRlVHlwZZ4tUq4QfcqvAgABTAAHZm9ybWF0c3QAEExqYXZhL3V0aWwvTGlzdDt4cQB+ABV+cQB+AAt0AAlUSU1FU1RBTVB+cQB+ABt0AAREYXRlcQB+ACBzcQB+AAAAAAABdwQAAAAAeHQABmdlbmRlcnNxAH4AE3EAfgAZcQB+ABxxAH4AIHNxAH4AAAAAAAN3BAAAAAJ0AAdrZXl3b3Jkc3EAfgAVcQB+ABB+cQB+ABt0AAdLZXl3b3JkcQB+ACB4dAAEY2l0eXEAfgAQdAAIbGFzdG5hbWVxAH4AEHQAB2JhbGFuY2VxAH4ADXQACGVtcGxveWVyc3EAfgATcQB+ABlxAH4AHHEAfgAgcQB+ACR0AAVzdGF0ZXNxAH4AE3EAfgAZcQB+ABxxAH4AIHNxAH4AAAAAAAN3BAAAAAJxAH4AMXEAfgAyeHQAA2FnZX5xAH4AC3QAB0lOVEVHRVJ0AAVlbWFpbHNxAH4AE3EAfgAZcQB+ABxxAH4AIHEAfgAkdAAEbWFsZX5xAH4AC3QAB0JPT0xFQU54AHg=\\\"}\",\"lang\":\"opensearch_compounded_script\",\"params\":{\"utcTimestamp\":*}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" } -} +} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_then_sort_push.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_then_sort_push.json index ebe7930c2d3..e51a4246cee 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_then_sort_push.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_then_sort_push.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(sort0=[$0], dir0=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(age=[$8])\n LogicalSort(sort0=[$8], dir0=[ASC-nulls-first])\n LogicalSort(fetch=[5])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "EnumerableSort(sort0=[$0], dir0=[ASC-nulls-first])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[LIMIT->5, PROJECT->[age], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":5,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"age\"],\"excludes\":[]}}, requestedTotalSize=5, pageSize=null, startFrom=0)])\n" + "physical": "EnumerableLimit(fetch=[10000])\n EnumerableSort(sort0=[$0], dir0=[ASC-nulls-first])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[LIMIT->5, PROJECT->[age]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":5,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"age\"],\"excludes\":[]}}, requestedTotalSize=5, pageSize=null, startFrom=0)])\n" } -} +} \ No newline at end of file diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableIndexScan.java index b3d0a587ef5..abe3344ecb8 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableIndexScan.java @@ -73,7 +73,7 @@ public void register(RelOptPlanner planner) { for (RelOptRule rule : OpenSearchRules.OPEN_SEARCH_OPT_RULES) { planner.addRule(rule); } - + for (RelOptRule rule : OpenSearchRules.OPTIMIZATION_RULES) { planner.addRule(rule); } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReverseTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReverseTest.java index 1c8482c756c..b47db6f3677 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReverseTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReverseTest.java @@ -19,8 +19,7 @@ public void testReverseParserSuccess() { String ppl = "source=EMP | reverse"; RelNode root = getRelNode(ppl); String expectedLogical = - "LogicalSort(sort0=[$0], dir0=[DESC])\n" - + " LogicalTableScan(table=[[scott, EMP]])\n"; + "LogicalSort(sort0=[$0], dir0=[DESC])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedResult = @@ -54,7 +53,8 @@ public void testReverseParserSuccess() { + " COMM=null; DEPTNO=20\n"; verifyResult(root, expectedResult); - String expectedSparkSql = "SELECT *\n" + "FROM `scott`.`EMP`\n" + "ORDER BY `EMPNO` DESC NULLS FIRST"; + String expectedSparkSql = + "SELECT *\n" + "FROM `scott`.`EMP`\n" + "ORDER BY `EMPNO` DESC NULLS FIRST"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -82,22 +82,19 @@ public void testReverseWithSortParserSuccess() { public void testDoubleReverseParserSuccess() { String ppl = "source=EMP | reverse | reverse"; RelNode root = getRelNode(ppl); - // TODO: fix logical plan to be no-op with opt rule + // Without optimization rule, shows consecutive sorts String expectedLogical = - "LogicalSort(sort0=[$8], dir0=[DESC])\n" - + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], __reverse_row_num__=[ROW_NUMBER() OVER ()])\n" - + " LogicalSort(sort0=[$8], dir0=[DESC])\n" - + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], __reverse_row_num__=[ROW_NUMBER() OVER ()])\n" - + " LogicalTableScan(table=[[scott, EMP]])\n"; + "LogicalSort(sort0=[$0], dir0=[ASC])\n" + + " LogicalSort(sort0=[$0], dir0=[DESC])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`\n" - + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, ROW_NUMBER() OVER () AS `__reverse_row_num__`\n" - + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, ROW_NUMBER() OVER () AS `__reverse_row_num__`\n" - + "FROM `scott`.`EMP`) `t`\n" - + "ORDER BY `__reverse_row_num__` DESC) `t0`\n" - + "ORDER BY `__reverse_row_num__` DESC"; + "SELECT *\n" + + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`\n" + + "FROM `scott`.`EMP`\n" + + "ORDER BY `EMPNO` DESC NULLS FIRST) `t`\n" + + "ORDER BY `EMPNO` NULLS LAST"; verifyPPLToSparkSQL(root, expectedSparkSql); } From d4d70c5efa3aeefdd388c02f006d6477845a60bb Mon Sep 17 00:00:00 2001 From: Selina Song Date: Mon, 8 Sep 2025 18:04:54 -0700 Subject: [PATCH 09/16] add no pushdown IT Signed-off-by: Selina Song --- .../opensearch/sql/calcite/remote/CalciteExplainIT.java | 8 ++++++-- .../explain_reverse_pushdown_multiple.json | 6 ++++++ .../explain_reverse_pushdown_single.json | 6 ++++++ 3 files changed, 18 insertions(+), 2 deletions(-) create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_multiple.json create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_single.json diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index 627bbad7e7f..cae2cc6a95c 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -220,7 +220,9 @@ public void testExplainWithReverse() throws IOException { public void testExplainWithReversePushdown() throws IOException { String query = "source=opensearch-sql_test_index_account | sort - age | reverse"; var result = explainQueryToString(query); - String expected = loadFromFile("expectedOutput/calcite/explain_reverse_pushdown_single.json"); + String expected = isPushdownEnabled() + ? loadFromFile("expectedOutput/calcite/explain_reverse_pushdown_single.json") + : loadFromFile("expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_single.json"); assertJsonEqualsIgnoreId(expected, result); } @@ -228,7 +230,9 @@ public void testExplainWithReversePushdown() throws IOException { public void testExplainWithReversePushdownMultipleFields() throws IOException { String query = "source=opensearch-sql_test_index_account | sort - age, + firstname | reverse"; var result = explainQueryToString(query); - String expected = loadFromFile("expectedOutput/calcite/explain_reverse_pushdown_multiple.json"); + String expected = isPushdownEnabled() + ? loadFromFile("expectedOutput/calcite/explain_reverse_pushdown_multiple.json") + : loadFromFile("expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_multiple.json"); assertJsonEqualsIgnoreId(expected, result); } diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_multiple.json b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_multiple.json new file mode 100644 index 00000000000..10915d929e6 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_multiple.json @@ -0,0 +1,6 @@ +{ + "calcite": { + "logical": "LogicalSystemLimit(sort0=[$8], sort1=[$1], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalSort(sort0=[$8], sort1=[$1], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last])\n LogicalSort(sort0=[$8], sort1=[$1], dir0=[DESC-nulls-last], dir1=[ASC-nulls-first])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", + "physical": "EnumerableLimit(fetch=[10000])\n EnumerableSort(sort0=[$8], sort1=[$1], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last])\n EnumerableCalc(expr#0..16=[{inputs}], proj#0..10=[{exprs}])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n" + } +} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_single.json b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_single.json new file mode 100644 index 00000000000..03135221480 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_single.json @@ -0,0 +1,6 @@ +{ + "calcite": { + "logical": "LogicalSystemLimit(sort0=[$8], dir0=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalSort(sort0=[$8], dir0=[ASC-nulls-first])\n LogicalSort(sort0=[$8], dir0=[DESC-nulls-last])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", + "physical": "EnumerableLimit(fetch=[10000])\n EnumerableSort(sort0=[$8], dir0=[ASC-nulls-first])\n EnumerableCalc(expr#0..16=[{inputs}], proj#0..10=[{exprs}])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n" + } +} \ No newline at end of file From dff4522d55b8a3ab4410eca25bc47874ba4b15bb Mon Sep 17 00:00:00 2001 From: Selina Song Date: Tue, 9 Sep 2025 10:04:45 -0700 Subject: [PATCH 10/16] spotless format Signed-off-by: Selina Song --- .../sql/calcite/remote/CalciteExplainIT.java | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index cae2cc6a95c..253053b5e88 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -220,9 +220,11 @@ public void testExplainWithReverse() throws IOException { public void testExplainWithReversePushdown() throws IOException { String query = "source=opensearch-sql_test_index_account | sort - age | reverse"; var result = explainQueryToString(query); - String expected = isPushdownEnabled() - ? loadFromFile("expectedOutput/calcite/explain_reverse_pushdown_single.json") - : loadFromFile("expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_single.json"); + String expected = + isPushdownEnabled() + ? loadFromFile("expectedOutput/calcite/explain_reverse_pushdown_single.json") + : loadFromFile( + "expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_single.json"); assertJsonEqualsIgnoreId(expected, result); } @@ -230,9 +232,11 @@ public void testExplainWithReversePushdown() throws IOException { public void testExplainWithReversePushdownMultipleFields() throws IOException { String query = "source=opensearch-sql_test_index_account | sort - age, + firstname | reverse"; var result = explainQueryToString(query); - String expected = isPushdownEnabled() - ? loadFromFile("expectedOutput/calcite/explain_reverse_pushdown_multiple.json") - : loadFromFile("expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_multiple.json"); + String expected = + isPushdownEnabled() + ? loadFromFile("expectedOutput/calcite/explain_reverse_pushdown_multiple.json") + : loadFromFile( + "expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_multiple.json"); assertJsonEqualsIgnoreId(expected, result); } From 2501ed465760da8fd423d2e2a8fbd37e60e57b48 Mon Sep 17 00:00:00 2001 From: Selina Song Date: Tue, 9 Sep 2025 11:01:37 -0700 Subject: [PATCH 11/16] Address code review: extend hasSameField to compare all fields and simplify null direction handling Signed-off-by: Selina Song --- .../rule/SortReverseOptimizationRule.java | 56 ++++++++++++------- .../sql/calcite/utils/PlanUtils.java | 13 ++--- 2 files changed, 43 insertions(+), 26 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/rule/SortReverseOptimizationRule.java b/core/src/main/java/org/opensearch/sql/calcite/rule/SortReverseOptimizationRule.java index 3f8340ceb1c..c804f019ed1 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/rule/SortReverseOptimizationRule.java +++ b/core/src/main/java/org/opensearch/sql/calcite/rule/SortReverseOptimizationRule.java @@ -70,30 +70,48 @@ public void onMatch(RelOptRuleCall call) { } private boolean hasSameFieldWithOppositeDirection(LogicalSort outerSort, LogicalSort innerSort) { - if (outerSort.getCollation().getFieldCollations().isEmpty() - || innerSort.getCollation().getFieldCollations().isEmpty()) { + var outerFields = outerSort.getCollation().getFieldCollations(); + var innerFields = innerSort.getCollation().getFieldCollations(); + + if (outerFields.isEmpty() || innerFields.isEmpty()) { LOG.debug("No field collations found"); return false; } - var outerField = outerSort.getCollation().getFieldCollations().get(0); - var innerField = innerSort.getCollation().getFieldCollations().get(0); - - LOG.debug( - "Outer field: index={}, direction={}", - outerField.getFieldIndex(), - outerField.getDirection()); - LOG.debug( - "Inner field: index={}, direction={}", - innerField.getFieldIndex(), - innerField.getDirection()); - - boolean sameField = outerField.getFieldIndex() == innerField.getFieldIndex(); - boolean oppositeDirection = outerField.getDirection() != innerField.getDirection(); + // Must have same number of fields + if (outerFields.size() != innerFields.size()) { + LOG.debug( + "Different number of sort fields: outer={}, inner={}", + outerFields.size(), + innerFields.size()); + return false; + } - LOG.debug("Same field: {}, Opposite direction: {}", sameField, oppositeDirection); + // Check all fields have same index but opposite directions + for (int i = 0; i < outerFields.size(); i++) { + var outerField = outerFields.get(i); + var innerField = innerFields.get(i); + + LOG.debug( + "Field {}: outer(index={}, direction={}), inner(index={}, direction={})", + i, + outerField.getFieldIndex(), + outerField.getDirection(), + innerField.getFieldIndex(), + innerField.getDirection()); + + if (outerField.getFieldIndex() != innerField.getFieldIndex() + || outerField.getDirection() == innerField.getDirection()) { + LOG.debug( + "Field {} mismatch: same index={}, opposite direction={}", + i, + outerField.getFieldIndex() == innerField.getFieldIndex(), + outerField.getDirection() != innerField.getDirection()); + return false; + } + } - // Must be same field with opposite directions - return sameField && oppositeDirection; + LOG.debug("All fields match with opposite directions"); + return true; } } diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java index d67a34fba80..1841f66d91c 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java @@ -411,13 +411,12 @@ public static RelCollation reverseCollation(RelCollation original) { RelFieldCollation.Direction reversedDirection = field.direction.reverse(); // Handle null direction properly - reverse it as well - RelFieldCollation.NullDirection reversedNullDirection = field.nullDirection; - if (reversedNullDirection == RelFieldCollation.NullDirection.FIRST) { - reversedNullDirection = RelFieldCollation.NullDirection.LAST; - } else if (reversedNullDirection == RelFieldCollation.NullDirection.LAST) { - reversedNullDirection = RelFieldCollation.NullDirection.FIRST; - } - // UNSPECIFIED remains UNSPECIFIED + RelFieldCollation.NullDirection reversedNullDirection = + field.nullDirection == RelFieldCollation.NullDirection.FIRST + ? RelFieldCollation.NullDirection.LAST + : field.nullDirection == RelFieldCollation.NullDirection.LAST + ? RelFieldCollation.NullDirection.FIRST + : field.nullDirection; RelFieldCollation reversedField = new RelFieldCollation(field.getFieldIndex(), reversedDirection, reversedNullDirection); From 980482d6c567944077fb0dad89d56f49585aae96 Mon Sep 17 00:00:00 2001 From: Selina Song Date: Tue, 9 Sep 2025 11:42:48 -0700 Subject: [PATCH 12/16] add UT sparkSQL Signed-off-by: Selina Song --- .../ppl/calcite/CalcitePPLReverseTest.java | 83 +++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReverseTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReverseTest.java index b47db6f3677..71294db978b 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReverseTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReverseTest.java @@ -142,4 +142,87 @@ public void testReverseWithExpressionShouldFail() { String ppl = "source=EMP | reverse EMPNO + 1"; getRelNode(ppl); } + + @Test + public void testMultipleSortsWithReverseParserSuccess() { + String ppl = "source=EMP | sort + SAL | sort - ENAME | reverse"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalSort(sort0=[$1], dir0=[ASC-nulls-first])\n" + + " LogicalSort(sort0=[$1], dir0=[DESC-nulls-last])\n" + + " LogicalSort(sort0=[$5], dir0=[ASC-nulls-first])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT *\n" + + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`\n" + + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`\n" + + "FROM `scott`.`EMP`\n" + + "ORDER BY `SAL`) `t`\n" + + "ORDER BY `ENAME` DESC) `t0`\n" + + "ORDER BY `ENAME`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testMultiFieldSortWithReverseParserSuccess() { + String ppl = "source=EMP | sort + SAL, - ENAME | reverse"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalSort(sort0=[$5], sort1=[$1], dir0=[DESC-nulls-last], dir1=[ASC-nulls-first])\n" + + " LogicalSort(sort0=[$5], sort1=[$1], dir0=[ASC-nulls-first]," + + " dir1=[DESC-nulls-last])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT *\n" + + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`\n" + + "FROM `scott`.`EMP`\n" + + "ORDER BY `SAL`, `ENAME` DESC) `t`\n" + + "ORDER BY `SAL` DESC, `ENAME`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testComplexMultiFieldSortWithReverseParserSuccess() { + String ppl = "source=EMP | sort DEPTNO, + SAL, - ENAME | reverse"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalSort(sort0=[$7], sort1=[$5], sort2=[$1], dir0=[DESC-nulls-last]," + + " dir1=[DESC-nulls-last], dir2=[ASC-nulls-first])\n" + + " LogicalSort(sort0=[$7], sort1=[$5], sort2=[$1], dir0=[ASC-nulls-first]," + + " dir1=[ASC-nulls-first], dir2=[DESC-nulls-last])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT *\n" + + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`\n" + + "FROM `scott`.`EMP`\n" + + "ORDER BY `DEPTNO`, `SAL`, `ENAME` DESC) `t`\n" + + "ORDER BY `DEPTNO` DESC, `SAL` DESC, `ENAME`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testReverseWithFieldsAndSortParserSuccess() { + String ppl = "source=EMP | fields ENAME, SAL, DEPTNO | sort + SAL | reverse"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalSort(sort0=[$1], dir0=[DESC-nulls-last])\n" + + " LogicalSort(sort0=[$1], dir0=[ASC-nulls-first])\n" + + " LogicalProject(ENAME=[$1], SAL=[$5], DEPTNO=[$7])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT *\n" + + "FROM (SELECT `ENAME`, `SAL`, `DEPTNO`\n" + + "FROM `scott`.`EMP`\n" + + "ORDER BY `SAL`) `t0`\n" + + "ORDER BY `SAL` DESC"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } } From 409bfa9d89bbf03ab34a4d2e229e52ae336832d0 Mon Sep 17 00:00:00 2001 From: Selina Song Date: Wed, 10 Sep 2025 16:29:01 -0700 Subject: [PATCH 13/16] rename opt rule, cleanup explain UT Signed-off-by: Selina Song --- .../sql/calcite/plan/OpenSearchRules.java | 6 +- .../sql/calcite/plan/OpenSearchTableScan.java | 5 -- ...ionRule.java => SortDirectionOptRule.java} | 55 +++++++++++-------- .../sql/calcite/remote/CalciteExplainIT.java | 26 +++------ .../explain_reverse_pushdown_multiple.json | 2 +- .../explain_reverse_pushdown_single.json | 2 +- .../scan/CalciteEnumerableIndexScan.java | 2 +- .../ppl/calcite/CalcitePPLReverseTest.java | 27 +++++++++ 8 files changed, 73 insertions(+), 52 deletions(-) rename core/src/main/java/org/opensearch/sql/calcite/rule/{SortReverseOptimizationRule.java => SortDirectionOptRule.java} (62%) diff --git a/core/src/main/java/org/opensearch/sql/calcite/plan/OpenSearchRules.java b/core/src/main/java/org/opensearch/sql/calcite/plan/OpenSearchRules.java index a563f6e9fa4..5578175366d 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/plan/OpenSearchRules.java +++ b/core/src/main/java/org/opensearch/sql/calcite/plan/OpenSearchRules.java @@ -8,7 +8,7 @@ import com.google.common.collect.ImmutableList; import java.util.List; import org.apache.calcite.plan.RelOptRule; -import org.opensearch.sql.calcite.rule.SortReverseOptimizationRule; +import org.opensearch.sql.calcite.rule.SortDirectionOptRule; public class OpenSearchRules { private static final PPLAggregateConvertRule AGGREGATE_CONVERT_RULE = @@ -17,10 +17,8 @@ public class OpenSearchRules { public static final List OPEN_SEARCH_OPT_RULES = ImmutableList.of(AGGREGATE_CONVERT_RULE); - public static final List OPTIMIZATION_RULES = ImmutableList.of(); - public static final List OPEN_SEARCH_POST_AGG_RULES = - ImmutableList.of(SortReverseOptimizationRule.INSTANCE); + ImmutableList.of(SortDirectionOptRule.INSTANCE); // prevent instantiation private OpenSearchRules() {} diff --git a/core/src/main/java/org/opensearch/sql/calcite/plan/OpenSearchTableScan.java b/core/src/main/java/org/opensearch/sql/calcite/plan/OpenSearchTableScan.java index da4804747ea..7782f839d75 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/plan/OpenSearchTableScan.java +++ b/core/src/main/java/org/opensearch/sql/calcite/plan/OpenSearchTableScan.java @@ -33,11 +33,6 @@ public void register(RelOptPlanner planner) { planner.addRule(rule); } - // Register optimization rules - for (RelOptRule rule : OpenSearchRules.OPTIMIZATION_RULES) { - planner.addRule(rule); - } - // Register post-aggregation rules (run after aggregate optimizations) for (RelOptRule rule : OpenSearchRules.OPEN_SEARCH_POST_AGG_RULES) { planner.addRule(rule); diff --git a/core/src/main/java/org/opensearch/sql/calcite/rule/SortReverseOptimizationRule.java b/core/src/main/java/org/opensearch/sql/calcite/rule/SortDirectionOptRule.java similarity index 62% rename from core/src/main/java/org/opensearch/sql/calcite/rule/SortReverseOptimizationRule.java rename to core/src/main/java/org/opensearch/sql/calcite/rule/SortDirectionOptRule.java index c804f019ed1..e6b8655c426 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/rule/SortReverseOptimizationRule.java +++ b/core/src/main/java/org/opensearch/sql/calcite/rule/SortDirectionOptRule.java @@ -12,36 +12,40 @@ import org.apache.logging.log4j.Logger; import org.opensearch.sql.calcite.plan.LogicalSystemLimit; -/** Combines sort then reverse into 1 sort. */ -public class SortReverseOptimizationRule extends RelOptRule { - private static final Logger LOG = LogManager.getLogger(SortReverseOptimizationRule.class); +/** Combines consecutive sorts with opposite directions into 1 sort. */ +public class SortDirectionOptRule extends RelOptRule { + private static final Logger LOG = LogManager.getLogger(SortDirectionOptRule.class); - public static final SortReverseOptimizationRule INSTANCE = new SortReverseOptimizationRule(); + public static final SortDirectionOptRule INSTANCE = new SortDirectionOptRule(); - private SortReverseOptimizationRule() { + private SortDirectionOptRule() { super( - operand(LogicalSort.class, operand(LogicalSort.class, any())), - "SortReverseOptimizationRule"); + operand(LogicalSort.class, + operand(org.apache.calcite.rel.RelNode.class, + operand(LogicalSort.class, any()))), + "SortDirectionOptRule"); } @Override public boolean matches(RelOptRuleCall call) { LogicalSort outerSort = call.rel(0); - LogicalSort innerSort = call.rel(1); - - LOG.debug("SortReverseOptimizationRule.matches() called"); - LOG.debug("Outer sort: {}", outerSort); - LOG.debug("Inner sort: {}", innerSort); - LOG.debug("Inner sort input: {}", innerSort.getInput()); - - // Don't optimize if outer sort is a LogicalSystemLimit - if (call.rel(0) instanceof LogicalSystemLimit) { - LOG.debug("Skipping: outer sort is LogicalSystemLimit"); + org.apache.calcite.rel.RelNode intermediate = call.rel(1); + LogicalSort innerSort = call.rel(2); + + LOG.debug("SortDirectionOptRule.matches() - outer: {}, intermediate: {}, inner: {}", + outerSort, intermediate, innerSort); + + // Only allow single-input intermediate nodes (like LogicalProject) + if (intermediate.getInputs().size() != 1) { + LOG.debug("Intermediate node has {} inputs, expected 1", intermediate.getInputs().size()); return false; } // Don't optimize if inner sort has a fetch limit (head/limit before sort) // This preserves limit-then-sort semantics + // Example: source=t | head 5 | sort field | reverse + // Plan: Sort(reverse) -> Sort(field, fetch=5) -> Scan + // Should NOT be optimized to preserve the "take first 5, then sort" behavior if (innerSort.fetch != null) { LOG.debug("Skipping: inner sort has fetch limit: {}", innerSort.fetch); return false; @@ -56,17 +60,23 @@ public boolean matches(RelOptRuleCall call) { @Override public void onMatch(RelOptRuleCall call) { LogicalSort outerSort = call.rel(0); - LogicalSort innerSort = call.rel(1); + org.apache.calcite.rel.RelNode intermediate = call.rel(1); + LogicalSort innerSort = call.rel(2); - LOG.debug("SortReverseOptimizationRule.onMatch() applying transformation"); - LOG.debug("Transforming from: {} -> {}", outerSort, innerSort); + LOG.debug("SortDirectionOptRule.onMatch() transforming: {} -> {}", outerSort, innerSort); + // Create optimized sort with the final direction LogicalSort optimizedSort = LogicalSort.create( innerSort.getInput(), outerSort.getCollation(), outerSort.offset, outerSort.fetch); - LOG.debug("Transformed to: {}", optimizedSort); - call.transformTo(optimizedSort); + // Recreate the intermediate node with the optimized sort as input + org.apache.calcite.rel.RelNode newIntermediate = + intermediate.copy(intermediate.getTraitSet(), + java.util.Collections.singletonList(optimizedSort)); + + LOG.debug("Transformed to: {}", newIntermediate); + call.transformTo(newIntermediate); } private boolean hasSameFieldWithOppositeDirection(LogicalSort outerSort, LogicalSort innerSort) { @@ -114,4 +124,5 @@ private boolean hasSameFieldWithOppositeDirection(LogicalSort outerSort, Logical LOG.debug("All fields match with opposite directions"); return true; } + } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index 253053b5e88..9c4ce999a89 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -220,11 +220,7 @@ public void testExplainWithReverse() throws IOException { public void testExplainWithReversePushdown() throws IOException { String query = "source=opensearch-sql_test_index_account | sort - age | reverse"; var result = explainQueryToString(query); - String expected = - isPushdownEnabled() - ? loadFromFile("expectedOutput/calcite/explain_reverse_pushdown_single.json") - : loadFromFile( - "expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_single.json"); + String expected = loadExpectedPlan("explain_reverse_pushdown_single.json"); assertJsonEqualsIgnoreId(expected, result); } @@ -232,31 +228,25 @@ public void testExplainWithReversePushdown() throws IOException { public void testExplainWithReversePushdownMultipleFields() throws IOException { String query = "source=opensearch-sql_test_index_account | sort - age, + firstname | reverse"; var result = explainQueryToString(query); - String expected = - isPushdownEnabled() - ? loadFromFile("expectedOutput/calcite/explain_reverse_pushdown_multiple.json") - : loadFromFile( - "expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_multiple.json"); + String expected = loadExpectedPlan("explain_reverse_pushdown_multiple.json"); assertJsonEqualsIgnoreId(expected, result); } @Test public void testExplainWithTimechartAvg() throws IOException { var result = explainQueryToString("source=events | timechart span=1m avg(cpu_usage) by host"); - String expected = - isPushdownEnabled() - ? loadFromFile("expectedOutput/calcite/explain_timechart.json") - : loadFromFile("expectedOutput/calcite/explain_timechart_no_pushdown.json"); + String expected = isPushdownEnabled() + ? loadFromFile("expectedOutput/calcite/explain_timechart.json") + : loadFromFile("expectedOutput/calcite/explain_timechart_no_pushdown.json"); assertJsonEqualsIgnoreId(expected, result); } @Test public void testExplainWithTimechartCount() throws IOException { var result = explainQueryToString("source=events | timechart span=1m count() by host"); - String expected = - isPushdownEnabled() - ? loadFromFile("expectedOutput/calcite/explain_timechart_count.json") - : loadFromFile("expectedOutput/calcite/explain_timechart_count_no_pushdown.json"); + String expected = isPushdownEnabled() + ? loadFromFile("expectedOutput/calcite/explain_timechart_count.json") + : loadFromFile("expectedOutput/calcite/explain_timechart_count_no_pushdown.json"); assertJsonEqualsIgnoreId(expected, result); } diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_multiple.json b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_multiple.json index 10915d929e6..975463cc30d 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_multiple.json +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_multiple.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(sort0=[$8], sort1=[$1], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalSort(sort0=[$8], sort1=[$1], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last])\n LogicalSort(sort0=[$8], sort1=[$1], dir0=[DESC-nulls-last], dir1=[ASC-nulls-first])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n EnumerableSort(sort0=[$8], sort1=[$1], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last])\n EnumerableCalc(expr#0..16=[{inputs}], proj#0..10=[{exprs}])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n" + "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..16=[{inputs}], proj#0..10=[{exprs}])\n EnumerableSort(sort0=[$8], sort1=[$1], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n" } } \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_single.json b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_single.json index 03135221480..113daa4f585 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_single.json +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_single.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(sort0=[$8], dir0=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalSort(sort0=[$8], dir0=[ASC-nulls-first])\n LogicalSort(sort0=[$8], dir0=[DESC-nulls-last])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n EnumerableSort(sort0=[$8], dir0=[ASC-nulls-first])\n EnumerableCalc(expr#0..16=[{inputs}], proj#0..10=[{exprs}])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n" + "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..16=[{inputs}], proj#0..10=[{exprs}])\n EnumerableSort(sort0=[$8], dir0=[ASC-nulls-first])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n" } } \ No newline at end of file diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableIndexScan.java index abe3344ecb8..8bbaa7b8086 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableIndexScan.java @@ -74,7 +74,7 @@ public void register(RelOptPlanner planner) { planner.addRule(rule); } - for (RelOptRule rule : OpenSearchRules.OPTIMIZATION_RULES) { + for (RelOptRule rule : OpenSearchRules.OPEN_SEARCH_POST_AGG_RULES) { planner.addRule(rule); } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReverseTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReverseTest.java index 71294db978b..09594422191 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReverseTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReverseTest.java @@ -225,4 +225,31 @@ public void testReverseWithFieldsAndSortParserSuccess() { + "ORDER BY `SAL` DESC"; verifyPPLToSparkSQL(root, expectedSparkSql); } + + @Test + public void testHeadThenSortReverseNoOpt() { + // Tests fetch limit behavior: head 5 | sort field | reverse + // Should NOT be optimized to preserve "take first 5, then sort" semantics + String ppl = "source=EMP | head 5 | sort + SAL | reverse"; + RelNode root = getRelNode(ppl); + + // Should have three LogicalSort nodes: fetch=5, sort SAL, reverse + // This demonstrates that SortDirectionOptRule skips optimization when inner sort has fetch + String expectedLogical = + "LogicalSort(sort0=[$5], dir0=[DESC-nulls-last])\n" + + " LogicalSort(sort0=[$5], dir0=[ASC-nulls-first])\n" + + " LogicalSort(fetch=[5])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT *\n" + + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`\n" + + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`\n" + + "FROM `scott`.`EMP`\n" + + "LIMIT 5) `t`\n" + + "ORDER BY `SAL`) `t0`\n" + + "ORDER BY `SAL` DESC"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } } From e8c54786d21d1dc16359618128742b22df4cfa0a Mon Sep 17 00:00:00 2001 From: Selina Song Date: Thu, 11 Sep 2025 11:42:59 -0700 Subject: [PATCH 14/16] remove Opt Rule, keep collation Signed-off-by: Selina Song --- .../sql/calcite/plan/OpenSearchRules.java | 4 - .../sql/calcite/plan/OpenSearchTableScan.java | 5 - .../calcite/rule/SortDirectionOptRule.java | 128 ------------------ .../sql/calcite/remote/CalciteExplainIT.java | 14 +- .../explain_reverse_pushdown_multiple.json | 2 +- .../explain_reverse_pushdown_single.json | 2 +- .../scan/CalciteEnumerableIndexScan.java | 4 - .../ppl/calcite/CalcitePPLReverseTest.java | 4 +- 8 files changed, 12 insertions(+), 151 deletions(-) delete mode 100644 core/src/main/java/org/opensearch/sql/calcite/rule/SortDirectionOptRule.java diff --git a/core/src/main/java/org/opensearch/sql/calcite/plan/OpenSearchRules.java b/core/src/main/java/org/opensearch/sql/calcite/plan/OpenSearchRules.java index 5578175366d..3f64e23493d 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/plan/OpenSearchRules.java +++ b/core/src/main/java/org/opensearch/sql/calcite/plan/OpenSearchRules.java @@ -8,7 +8,6 @@ import com.google.common.collect.ImmutableList; import java.util.List; import org.apache.calcite.plan.RelOptRule; -import org.opensearch.sql.calcite.rule.SortDirectionOptRule; public class OpenSearchRules { private static final PPLAggregateConvertRule AGGREGATE_CONVERT_RULE = @@ -17,9 +16,6 @@ public class OpenSearchRules { public static final List OPEN_SEARCH_OPT_RULES = ImmutableList.of(AGGREGATE_CONVERT_RULE); - public static final List OPEN_SEARCH_POST_AGG_RULES = - ImmutableList.of(SortDirectionOptRule.INSTANCE); - // prevent instantiation private OpenSearchRules() {} } diff --git a/core/src/main/java/org/opensearch/sql/calcite/plan/OpenSearchTableScan.java b/core/src/main/java/org/opensearch/sql/calcite/plan/OpenSearchTableScan.java index 7782f839d75..03678713ef4 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/plan/OpenSearchTableScan.java +++ b/core/src/main/java/org/opensearch/sql/calcite/plan/OpenSearchTableScan.java @@ -33,11 +33,6 @@ public void register(RelOptPlanner planner) { planner.addRule(rule); } - // Register post-aggregation rules (run after aggregate optimizations) - for (RelOptRule rule : OpenSearchRules.OPEN_SEARCH_POST_AGG_RULES) { - planner.addRule(rule); - } - // remove this rule otherwise opensearch can't correctly interpret approx_count_distinct() // it is converted to cardinality aggregation in OpenSearch planner.removeRule(CoreRules.AGGREGATE_EXPAND_DISTINCT_AGGREGATES); diff --git a/core/src/main/java/org/opensearch/sql/calcite/rule/SortDirectionOptRule.java b/core/src/main/java/org/opensearch/sql/calcite/rule/SortDirectionOptRule.java deleted file mode 100644 index e6b8655c426..00000000000 --- a/core/src/main/java/org/opensearch/sql/calcite/rule/SortDirectionOptRule.java +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.sql.calcite.rule; - -import org.apache.calcite.plan.RelOptRule; -import org.apache.calcite.plan.RelOptRuleCall; -import org.apache.calcite.rel.logical.LogicalSort; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.opensearch.sql.calcite.plan.LogicalSystemLimit; - -/** Combines consecutive sorts with opposite directions into 1 sort. */ -public class SortDirectionOptRule extends RelOptRule { - private static final Logger LOG = LogManager.getLogger(SortDirectionOptRule.class); - - public static final SortDirectionOptRule INSTANCE = new SortDirectionOptRule(); - - private SortDirectionOptRule() { - super( - operand(LogicalSort.class, - operand(org.apache.calcite.rel.RelNode.class, - operand(LogicalSort.class, any()))), - "SortDirectionOptRule"); - } - - @Override - public boolean matches(RelOptRuleCall call) { - LogicalSort outerSort = call.rel(0); - org.apache.calcite.rel.RelNode intermediate = call.rel(1); - LogicalSort innerSort = call.rel(2); - - LOG.debug("SortDirectionOptRule.matches() - outer: {}, intermediate: {}, inner: {}", - outerSort, intermediate, innerSort); - - // Only allow single-input intermediate nodes (like LogicalProject) - if (intermediate.getInputs().size() != 1) { - LOG.debug("Intermediate node has {} inputs, expected 1", intermediate.getInputs().size()); - return false; - } - - // Don't optimize if inner sort has a fetch limit (head/limit before sort) - // This preserves limit-then-sort semantics - // Example: source=t | head 5 | sort field | reverse - // Plan: Sort(reverse) -> Sort(field, fetch=5) -> Scan - // Should NOT be optimized to preserve the "take first 5, then sort" behavior - if (innerSort.fetch != null) { - LOG.debug("Skipping: inner sort has fetch limit: {}", innerSort.fetch); - return false; - } - - // Must be same field with opposite directions (sort | reverse pattern) - boolean matches = hasSameFieldWithOppositeDirection(outerSort, innerSort); - LOG.debug("Same field with opposite direction: {}", matches); - return matches; - } - - @Override - public void onMatch(RelOptRuleCall call) { - LogicalSort outerSort = call.rel(0); - org.apache.calcite.rel.RelNode intermediate = call.rel(1); - LogicalSort innerSort = call.rel(2); - - LOG.debug("SortDirectionOptRule.onMatch() transforming: {} -> {}", outerSort, innerSort); - - // Create optimized sort with the final direction - LogicalSort optimizedSort = - LogicalSort.create( - innerSort.getInput(), outerSort.getCollation(), outerSort.offset, outerSort.fetch); - - // Recreate the intermediate node with the optimized sort as input - org.apache.calcite.rel.RelNode newIntermediate = - intermediate.copy(intermediate.getTraitSet(), - java.util.Collections.singletonList(optimizedSort)); - - LOG.debug("Transformed to: {}", newIntermediate); - call.transformTo(newIntermediate); - } - - private boolean hasSameFieldWithOppositeDirection(LogicalSort outerSort, LogicalSort innerSort) { - var outerFields = outerSort.getCollation().getFieldCollations(); - var innerFields = innerSort.getCollation().getFieldCollations(); - - if (outerFields.isEmpty() || innerFields.isEmpty()) { - LOG.debug("No field collations found"); - return false; - } - - // Must have same number of fields - if (outerFields.size() != innerFields.size()) { - LOG.debug( - "Different number of sort fields: outer={}, inner={}", - outerFields.size(), - innerFields.size()); - return false; - } - - // Check all fields have same index but opposite directions - for (int i = 0; i < outerFields.size(); i++) { - var outerField = outerFields.get(i); - var innerField = innerFields.get(i); - - LOG.debug( - "Field {}: outer(index={}, direction={}), inner(index={}, direction={})", - i, - outerField.getFieldIndex(), - outerField.getDirection(), - innerField.getFieldIndex(), - innerField.getDirection()); - - if (outerField.getFieldIndex() != innerField.getFieldIndex() - || outerField.getDirection() == innerField.getDirection()) { - LOG.debug( - "Field {} mismatch: same index={}, opposite direction={}", - i, - outerField.getFieldIndex() == innerField.getFieldIndex(), - outerField.getDirection() != innerField.getDirection()); - return false; - } - } - - LOG.debug("All fields match with opposite directions"); - return true; - } - -} diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index 9c4ce999a89..54fde97cc07 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -235,18 +235,20 @@ public void testExplainWithReversePushdownMultipleFields() throws IOException { @Test public void testExplainWithTimechartAvg() throws IOException { var result = explainQueryToString("source=events | timechart span=1m avg(cpu_usage) by host"); - String expected = isPushdownEnabled() - ? loadFromFile("expectedOutput/calcite/explain_timechart.json") - : loadFromFile("expectedOutput/calcite/explain_timechart_no_pushdown.json"); + String expected = + isPushdownEnabled() + ? loadFromFile("expectedOutput/calcite/explain_timechart.json") + : loadFromFile("expectedOutput/calcite/explain_timechart_no_pushdown.json"); assertJsonEqualsIgnoreId(expected, result); } @Test public void testExplainWithTimechartCount() throws IOException { var result = explainQueryToString("source=events | timechart span=1m count() by host"); - String expected = isPushdownEnabled() - ? loadFromFile("expectedOutput/calcite/explain_timechart_count.json") - : loadFromFile("expectedOutput/calcite/explain_timechart_count_no_pushdown.json"); + String expected = + isPushdownEnabled() + ? loadFromFile("expectedOutput/calcite/explain_timechart_count.json") + : loadFromFile("expectedOutput/calcite/explain_timechart_count_no_pushdown.json"); assertJsonEqualsIgnoreId(expected, result); } diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_multiple.json b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_multiple.json index 975463cc30d..10915d929e6 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_multiple.json +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_multiple.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(sort0=[$8], sort1=[$1], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalSort(sort0=[$8], sort1=[$1], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last])\n LogicalSort(sort0=[$8], sort1=[$1], dir0=[DESC-nulls-last], dir1=[ASC-nulls-first])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..16=[{inputs}], proj#0..10=[{exprs}])\n EnumerableSort(sort0=[$8], sort1=[$1], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n" + "physical": "EnumerableLimit(fetch=[10000])\n EnumerableSort(sort0=[$8], sort1=[$1], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last])\n EnumerableCalc(expr#0..16=[{inputs}], proj#0..10=[{exprs}])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n" } } \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_single.json b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_single.json index 113daa4f585..03135221480 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_single.json +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_single.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(sort0=[$8], dir0=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalSort(sort0=[$8], dir0=[ASC-nulls-first])\n LogicalSort(sort0=[$8], dir0=[DESC-nulls-last])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..16=[{inputs}], proj#0..10=[{exprs}])\n EnumerableSort(sort0=[$8], dir0=[ASC-nulls-first])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n" + "physical": "EnumerableLimit(fetch=[10000])\n EnumerableSort(sort0=[$8], dir0=[ASC-nulls-first])\n EnumerableCalc(expr#0..16=[{inputs}], proj#0..10=[{exprs}])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n" } } \ No newline at end of file diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableIndexScan.java index 8bbaa7b8086..33b80f61982 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableIndexScan.java @@ -74,10 +74,6 @@ public void register(RelOptPlanner planner) { planner.addRule(rule); } - for (RelOptRule rule : OpenSearchRules.OPEN_SEARCH_POST_AGG_RULES) { - planner.addRule(rule); - } - // remove this rule otherwise opensearch can't correctly interpret approx_count_distinct() // it is converted to cardinality aggregation in OpenSearch planner.removeRule(CoreRules.AGGREGATE_EXPAND_DISTINCT_AGGREGATES); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReverseTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReverseTest.java index 09594422191..7e732648185 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReverseTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReverseTest.java @@ -232,9 +232,9 @@ public void testHeadThenSortReverseNoOpt() { // Should NOT be optimized to preserve "take first 5, then sort" semantics String ppl = "source=EMP | head 5 | sort + SAL | reverse"; RelNode root = getRelNode(ppl); - + // Should have three LogicalSort nodes: fetch=5, sort SAL, reverse - // This demonstrates that SortDirectionOptRule skips optimization when inner sort has fetch + // Calcite's built-in optimization will handle the physical plan optimization String expectedLogical = "LogicalSort(sort0=[$5], dir0=[DESC-nulls-last])\n" + " LogicalSort(sort0=[$5], dir0=[ASC-nulls-first])\n" From a16c2c542eb1943c5d9a0bac320a01a7388ec1ec Mon Sep 17 00:00:00 2001 From: Selina Song Date: Fri, 12 Sep 2025 09:49:06 -0700 Subject: [PATCH 15/16] Add full reverse fallback log plan Signed-off-by: Selina Song --- .../sql/calcite/remote/CalciteExplainIT.java | 13 ++++--------- .../calcite/explain_reverse_fallback.json | 6 ++++++ .../explain_reverse_fallback.json | 6 ++++++ 3 files changed, 16 insertions(+), 9 deletions(-) create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_fallback.json create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_fallback.json diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index 12cd86b22d5..d5a97587783 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -237,15 +237,10 @@ public void testFilterFunctionScriptPushDownExplain() throws Exception { @Test public void testExplainWithReverse() throws IOException { - String result = - executeWithReplace("explain source=opensearch-sql_test_index_account | reverse | head 5"); - - // Verify that the plan contains a LogicalSort with fetch (from head 5) - assertTrue(result.contains("LogicalSort") && result.contains("fetch=[5]")); - - // Verify that reverse added a ROW_NUMBER and another sort (descending) - assertTrue(result.contains("ROW_NUMBER()")); - assertTrue(result.contains("dir0=[DESC]")); + String query = "source=opensearch-sql_test_index_account | reverse | head 5"; + var result = explainQueryToString(query); + String expected = loadExpectedPlan("explain_reverse_fallback.json"); + assertJsonEqualsIgnoreId(expected, result); } @Test diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_fallback.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_fallback.json new file mode 100644 index 00000000000..723d977fb9d --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_fallback.json @@ -0,0 +1,6 @@ +{ + "calcite": { + "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalSort(sort0=[$17], dir0=[DESC], fetch=[5])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __reverse_row_num__=[ROW_NUMBER() OVER ()])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", + "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..17=[{inputs}], proj#0..10=[{exprs}])\n EnumerableLimit(fetch=[5])\n EnumerableSort(sort0=[$17], dir0=[DESC])\n EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n" + } +} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_fallback.json b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_fallback.json new file mode 100644 index 00000000000..d36c9ee1a39 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_fallback.json @@ -0,0 +1,6 @@ +{ + "calcite": { + "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalSort(sort0=[$17], dir0=[DESC], fetch=[5])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __reverse_row_num__=[ROW_NUMBER() OVER ()])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", + "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..11=[{inputs}], proj#0..10=[{exprs}])\n EnumerableLimit(fetch=[5])\n EnumerableSort(sort0=[$11], dir0=[DESC])\n EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + } +} \ No newline at end of file From 63a1f801fc8dc51dc384d6209eb89c072a3c69d9 Mon Sep 17 00:00:00 2001 From: Selina Song Date: Fri, 12 Sep 2025 10:18:15 -0700 Subject: [PATCH 16/16] fix ExplainIT Signed-off-by: Selina Song --- .../expectedOutput/calcite/explain_reverse_fallback.json | 2 +- .../calcite_no_pushdown/explain_reverse_fallback.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_fallback.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_fallback.json index 723d977fb9d..d36c9ee1a39 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_fallback.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_fallback.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalSort(sort0=[$17], dir0=[DESC], fetch=[5])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __reverse_row_num__=[ROW_NUMBER() OVER ()])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..17=[{inputs}], proj#0..10=[{exprs}])\n EnumerableLimit(fetch=[5])\n EnumerableSort(sort0=[$17], dir0=[DESC])\n EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n" + "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..11=[{inputs}], proj#0..10=[{exprs}])\n EnumerableLimit(fetch=[5])\n EnumerableSort(sort0=[$11], dir0=[DESC])\n EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" } } \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_fallback.json b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_fallback.json index d36c9ee1a39..723d977fb9d 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_fallback.json +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_fallback.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalSort(sort0=[$17], dir0=[DESC], fetch=[5])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __reverse_row_num__=[ROW_NUMBER() OVER ()])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..11=[{inputs}], proj#0..10=[{exprs}])\n EnumerableLimit(fetch=[5])\n EnumerableSort(sort0=[$11], dir0=[DESC])\n EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..17=[{inputs}], proj#0..10=[{exprs}])\n EnumerableLimit(fetch=[5])\n EnumerableSort(sort0=[$17], dir0=[DESC])\n EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n" } } \ No newline at end of file