From f1ed013f879b6f10739b3fedaf9a0979dce2b918 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Mon, 10 Nov 2025 14:44:12 -0800 Subject: [PATCH 01/10] Implement reverse performance optimization This commit optimizes the `reverse` command in the Calcite planner by intelligently reversing existing sort collations instead of always using the ROW_NUMBER() approach. Key changes: - Added PlanUtils.reverseCollation() method to flip sort directions and null directions - Updated CalciteRelNodeVisitor.visitReverse() to: - Check for existing sort collations - Reverse them if present (more efficient) - Fall back to ROW_NUMBER() when no sort exists - Added comprehensive integration test expected outputs for: - Single field reverse pushdown - Multiple field reverse pushdown - Reverse fallback cases - Double reverse no-op optimizations This optimization significantly improves performance when reversing already-sorted data by leveraging database-native sort reversal. Based on PR #4056 by @selsong Signed-off-by: Kai Huang --- .../sql/calcite/CalciteRelNodeVisitor.java | 38 ++++++++++++------- .../sql/calcite/utils/PlanUtils.java | 34 +++++++++++++++++ .../calcite/explain_double_reverse_no_op.json | 6 +++ .../explain_double_reverse_sort_no_op.json | 6 +++ .../calcite/explain_reverse_fallback.json | 6 +++ .../explain_reverse_pushdown_multiple.json | 6 +++ .../explain_reverse_pushdown_single.json | 6 +++ .../explain_reverse_fallback.json | 6 +++ .../explain_reverse_pushdown_multiple.json | 6 +++ .../explain_reverse_pushdown_single.json | 6 +++ 10 files changed, 107 insertions(+), 13 deletions(-) create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_double_reverse_no_op.json create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_double_reverse_sort_no_op.json create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_fallback.json create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_pushdown_multiple.json create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_pushdown_single.json create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_fallback.json create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_multiple.json create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_single.json diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 2b3b1fd88e7..24bab0b7445 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -45,6 +45,7 @@ import org.apache.calcite.adapter.enumerable.RexToLixTranslator; import org.apache.calcite.plan.RelOptTable; import org.apache.calcite.plan.ViewExpanders; +import org.apache.calcite.rel.RelCollation; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Aggregate; import org.apache.calcite.rel.core.JoinRelType; @@ -674,19 +675,30 @@ public RelNode visitHead(Head node, CalcitePlanContext context) { public RelNode visitReverse( org.opensearch.sql.ast.tree.Reverse node, CalcitePlanContext context) { visitChildren(node, context); - // Add ROW_NUMBER() column - RexNode rowNumber = - context - .relBuilder - .aggregateCall(SqlStdOperatorTable.ROW_NUMBER) - .over() - .rowsTo(RexWindowBounds.CURRENT_ROW) - .as(REVERSE_ROW_NUM); - context.relBuilder.projectPlus(rowNumber); - // Sort by row number descending - context.relBuilder.sort(context.relBuilder.desc(context.relBuilder.field(REVERSE_ROW_NUM))); - // Remove row number column - context.relBuilder.projectExcept(context.relBuilder.field(REVERSE_ROW_NUM)); + + // Check if there's an existing sort to reverse + List collations = + context.relBuilder.getCluster().getMetadataQuery().collations(context.relBuilder.peek()); + RelCollation collation = collations != null && !collations.isEmpty() ? collations.get(0) : null; + + if (collation != null && !collation.getFieldCollations().isEmpty()) { + // If there's an existing sort, reverse its direction + RelCollation reversedCollation = PlanUtils.reverseCollation(collation); + context.relBuilder.sort(reversedCollation); + } else { + // Fallback: use ROW_NUMBER approach when no existing sort + RexNode rowNumber = + context + .relBuilder + .aggregateCall(SqlStdOperatorTable.ROW_NUMBER) + .over() + .rowsTo(RexWindowBounds.CURRENT_ROW) + .as(REVERSE_ROW_NUM); + context.relBuilder.projectPlus(rowNumber); + context.relBuilder.sort(context.relBuilder.desc(context.relBuilder.field(REVERSE_ROW_NUM))); + context.relBuilder.projectExcept(context.relBuilder.field(REVERSE_ROW_NUM)); + } + return context.relBuilder.peek(); } diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java index ded7ba541a4..e1db3529736 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java @@ -22,6 +22,9 @@ import java.util.stream.Collectors; import javax.annotation.Nullable; import org.apache.calcite.plan.RelOptTable; +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelCollations; +import org.apache.calcite.rel.RelFieldCollation; import org.apache.calcite.rel.RelHomogeneousShuttle; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.RelShuttle; @@ -568,6 +571,37 @@ public Void visitCorrelVariable(RexCorrelVariable correlVar) { } } + /** + * Reverses the direction of a RelCollation. + * + * @param original The original collation to reverse + * @return A new RelCollation with reversed directions + */ + public static RelCollation reverseCollation(RelCollation original) { + if (original == null || original.getFieldCollations().isEmpty()) { + return original; + } + + List reversedFields = new ArrayList<>(); + for (RelFieldCollation field : original.getFieldCollations()) { + RelFieldCollation.Direction reversedDirection = field.direction.reverse(); + + // Handle null direction properly - reverse it as well + RelFieldCollation.NullDirection reversedNullDirection = + field.nullDirection == RelFieldCollation.NullDirection.FIRST + ? RelFieldCollation.NullDirection.LAST + : field.nullDirection == RelFieldCollation.NullDirection.LAST + ? RelFieldCollation.NullDirection.FIRST + : field.nullDirection; + + RelFieldCollation reversedField = + new RelFieldCollation(field.getFieldIndex(), reversedDirection, reversedNullDirection); + reversedFields.add(reversedField); + } + + return RelCollations.of(reversedFields); + } + /** Adds a rel node to the top of the stack while preserving the field names and aliases. */ static void replaceTop(RelBuilder relBuilder, RelNode relNode) { try { diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_double_reverse_no_op.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_double_reverse_no_op.json new file mode 100644 index 00000000000..57ecf12a092 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_double_reverse_no_op.json @@ -0,0 +1,6 @@ +{ + "calcite": { + "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(cpu_usage=[$0], @timestamp=[$1])\n LogicalSort(sort0=[$8], dir0=[DESC])\n LogicalProject(cpu_usage=[$0], @timestamp=[$1], _id=[$2], _index=[$3], _score=[$4], _maxscore=[$5], _sort=[$6], _routing=[$7], __reverse_row_num__=[ROW_NUMBER() OVER ()])\n LogicalSort(sort0=[$8], dir0=[DESC])\n LogicalProject(cpu_usage=[$0], @timestamp=[$1], _id=[$2], _index=[$3], _score=[$4], _maxscore=[$5], _sort=[$6], _routing=[$7], __reverse_row_num__=[ROW_NUMBER() OVER ()])\n CalciteLogicalIndexScan(table=[[OpenSearch, events]])\n", + "physical": "EnumerableCalc(expr#0..3=[{inputs}], proj#0..1=[{exprs}])\n EnumerableLimit(fetch=[10000])\n EnumerableSort(sort0=[$3], dir0=[DESC])\n EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])])\n EnumerableSort(sort0=[$2], dir0=[DESC])\n EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])])\n CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[PROJECT->[cpu_usage, @timestamp]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"cpu_usage\",\"@timestamp\"],\"excludes\":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + } +} diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_double_reverse_sort_no_op.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_double_reverse_sort_no_op.json new file mode 100644 index 00000000000..e3ad90650d7 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_double_reverse_sort_no_op.json @@ -0,0 +1,6 @@ +{ + "calcite": { + "logical": "LogicalSystemLimit(sort0=[$8], sort1=[$1], dir0=[DESC-nulls-last], dir1=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalSort(sort0=[$8], sort1=[$1], dir0=[DESC-nulls-last], dir1=[ASC-nulls-first])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", + "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SORT->[{\n \"age\" : {\n \"order\" : \"desc\",\n \"missing\" : \"_last\"\n }\n}, {\n \"firstname.keyword\" : {\n \"order\" : \"asc\",\n \"missing\" : \"_first\"\n }\n}], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]},\"sort\":[{\"age\":{\"order\":\"desc\",\"missing\":\"_last\"}},{\"firstname.keyword\":{\"order\":\"asc\",\"missing\":\"_first\"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" + } +} diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_fallback.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_fallback.json new file mode 100644 index 00000000000..8cf7b29bd62 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_fallback.json @@ -0,0 +1,6 @@ +{ + "calcite": { + "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalSort(sort0=[$17], dir0=[DESC], fetch=[5])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __reverse_row_num__=[ROW_NUMBER() OVER ()])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", + "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..11=[{inputs}], proj#0..10=[{exprs}])\n EnumerableLimit(fetch=[5])\n EnumerableSort(sort0=[$11], dir0=[DESC])\n EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + } +} diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_pushdown_multiple.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_pushdown_multiple.json new file mode 100644 index 00000000000..f4d73f04866 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_pushdown_multiple.json @@ -0,0 +1,6 @@ +{ + "calcite": { + "logical": "LogicalSystemLimit(sort0=[$8], sort1=[$1], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalSort(sort0=[$8], sort1=[$1], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last])\n LogicalSort(sort0=[$8], sort1=[$1], dir0=[DESC-nulls-last], dir1=[ASC-nulls-first])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", + "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SORT->[{\n \"age\" : {\n \"order\" : \"asc\",\n \"missing\" : \"_first\"\n }\n}, {\n \"firstname.keyword\" : {\n \"order\" : \"desc\",\n \"missing\" : \"_last\"\n }\n}], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]},\"sort\":[{\"age\":{\"order\":\"asc\",\"missing\":\"_first\"}},{\"firstname.keyword\":{\"order\":\"desc\",\"missing\":\"_last\"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" + } +} diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_pushdown_single.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_pushdown_single.json new file mode 100644 index 00000000000..491b20a4a01 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_pushdown_single.json @@ -0,0 +1,6 @@ +{ + "calcite": { + "logical": "LogicalSystemLimit(sort0=[$8], dir0=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalSort(sort0=[$8], dir0=[ASC-nulls-first])\n LogicalSort(sort0=[$8], dir0=[DESC-nulls-last])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", + "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SORT->[{\n \"age\" : {\n \"order\" : \"asc\",\n \"missing\" : \"_first\"\n }\n}], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]},\"sort\":[{\"age\":{\"order\":\"asc\",\"missing\":\"_first\"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" + } +} diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_fallback.json b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_fallback.json new file mode 100644 index 00000000000..723d977fb9d --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_fallback.json @@ -0,0 +1,6 @@ +{ + "calcite": { + "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalSort(sort0=[$17], dir0=[DESC], fetch=[5])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __reverse_row_num__=[ROW_NUMBER() OVER ()])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", + "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..17=[{inputs}], proj#0..10=[{exprs}])\n EnumerableLimit(fetch=[5])\n EnumerableSort(sort0=[$17], dir0=[DESC])\n EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n" + } +} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_multiple.json b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_multiple.json new file mode 100644 index 00000000000..10915d929e6 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_multiple.json @@ -0,0 +1,6 @@ +{ + "calcite": { + "logical": "LogicalSystemLimit(sort0=[$8], sort1=[$1], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalSort(sort0=[$8], sort1=[$1], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last])\n LogicalSort(sort0=[$8], sort1=[$1], dir0=[DESC-nulls-last], dir1=[ASC-nulls-first])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", + "physical": "EnumerableLimit(fetch=[10000])\n EnumerableSort(sort0=[$8], sort1=[$1], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last])\n EnumerableCalc(expr#0..16=[{inputs}], proj#0..10=[{exprs}])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n" + } +} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_single.json b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_single.json new file mode 100644 index 00000000000..03135221480 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_single.json @@ -0,0 +1,6 @@ +{ + "calcite": { + "logical": "LogicalSystemLimit(sort0=[$8], dir0=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalSort(sort0=[$8], dir0=[ASC-nulls-first])\n LogicalSort(sort0=[$8], dir0=[DESC-nulls-last])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", + "physical": "EnumerableLimit(fetch=[10000])\n EnumerableSort(sort0=[$8], dir0=[ASC-nulls-first])\n EnumerableCalc(expr#0..16=[{inputs}], proj#0..10=[{exprs}])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n" + } +} \ No newline at end of file From 3dbb97746fbd6d62dd86b897e00b056503688255 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Mon, 10 Nov 2025 14:53:37 -0800 Subject: [PATCH 02/10] fixes Signed-off-by: Kai Huang --- .../sql/calcite/CalciteNoPushdownIT.java | 1 + .../sql/calcite/remote/CalciteExplainIT.java | 26 +++++++++++++------ 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java index 15051417db1..56b12a84c5a 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java @@ -88,6 +88,7 @@ CalciteQueryAnalysisIT.class, CalciteRareCommandIT.class, CalciteRegexCommandIT.class, + CalciteReverseCommandIT.class, CalciteRexCommandIT.class, CalciteRenameCommandIT.class, CalciteReplaceCommandIT.class, diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index 7a454208826..a7e94374eff 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -411,16 +411,26 @@ public void testFilterWithSearchCall() throws IOException { @Test public void testExplainWithReverse() throws IOException { - String result = - executeWithReplace( - "explain source=opensearch-sql_test_index_account | sort age | reverse | head 5"); + String query = "source=opensearch-sql_test_index_account | reverse | head 5"; + var result = explainQueryToString(query); + String expected = loadExpectedPlan("explain_reverse_fallback.json"); + assertJsonEqualsIgnoreId(expected, result); + } - // Verify that the plan contains a LogicalSort with fetch (from head 5) - assertTrue(result.contains("LogicalSort") && result.contains("fetch=[5]")); + @Test + public void testExplainWithReversePushdown() throws IOException { + String query = "source=opensearch-sql_test_index_account | sort - age | reverse"; + var result = explainQueryToString(query); + String expected = loadExpectedPlan("explain_reverse_pushdown_single.json"); + assertJsonEqualsIgnoreId(expected, result); + } - // Verify that reverse added a ROW_NUMBER and another sort (descending) - assertTrue(result.contains("ROW_NUMBER()")); - assertTrue(result.contains("dir0=[DESC]")); + @Test + public void testExplainWithReversePushdownMultipleFields() throws IOException { + String query = "source=opensearch-sql_test_index_account | sort - age, + firstname | reverse"; + var result = explainQueryToString(query); + String expected = loadExpectedPlan("explain_reverse_pushdown_multiple.json"); + assertJsonEqualsIgnoreId(expected, result); } @Test From 474e1886db58795df80c368e8804994ac63fc4f5 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Mon, 10 Nov 2025 15:03:45 -0800 Subject: [PATCH 03/10] fix UT Signed-off-by: Kai Huang --- .../ppl/calcite/CalcitePPLReverseTest.java | 188 ++++++++++++------ 1 file changed, 131 insertions(+), 57 deletions(-) diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReverseTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReverseTest.java index 179fb3bc830..7e732648185 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReverseTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReverseTest.java @@ -19,13 +19,7 @@ public void testReverseParserSuccess() { String ppl = "source=EMP | reverse"; RelNode root = getRelNode(ppl); String expectedLogical = - "" - + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," - + " COMM=[$6], DEPTNO=[$7])\n" - + " LogicalSort(sort0=[$8], dir0=[DESC])\n" - + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," - + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __reverse_row_num__=[ROW_NUMBER() OVER ()])\n" - + " LogicalTableScan(table=[[scott, EMP]])\n"; + "LogicalSort(sort0=[$0], dir0=[DESC])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedResult = @@ -60,12 +54,7 @@ public void testReverseParserSuccess() { verifyResult(root, expectedResult); String expectedSparkSql = - "" - + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`\n" - + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," - + " ROW_NUMBER() OVER () `__reverse_row_num__`\n" - + "FROM `scott`.`EMP`\n" - + "ORDER BY 9 DESC NULLS FIRST) `t0`"; + "SELECT *\n" + "FROM `scott`.`EMP`\n" + "ORDER BY `EMPNO` DESC NULLS FIRST"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -73,25 +62,19 @@ public void testReverseParserSuccess() { public void testReverseWithSortParserSuccess() { String ppl = "source=EMP | sort ENAME | reverse"; RelNode root = getRelNode(ppl); + // Optimization rule may show double sorts in logical plan but physical execution is optimized String expectedLogical = - "" - + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," - + " COMM=[$6], DEPTNO=[$7])\n" - + " LogicalSort(sort0=[$8], dir0=[DESC])\n" - + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," - + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __reverse_row_num__=[ROW_NUMBER() OVER ()])\n" - + " LogicalSort(sort0=[$1], dir0=[ASC-nulls-first])\n" - + " LogicalTableScan(table=[[scott, EMP]])\n"; + "LogicalSort(sort0=[$1], dir0=[DESC-nulls-last])\n" + + " LogicalSort(sort0=[$1], dir0=[ASC-nulls-first])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = - "" - + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`\n" - + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," - + " ROW_NUMBER() OVER () `__reverse_row_num__`\n" + "SELECT *\n" + + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`\n" + "FROM `scott`.`EMP`\n" - + "ORDER BY `ENAME`) `t0`\n" - + "ORDER BY `__reverse_row_num__` DESC NULLS FIRST"; + + "ORDER BY `ENAME`) `t`\n" + + "ORDER BY `ENAME` DESC"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -99,28 +82,19 @@ public void testReverseWithSortParserSuccess() { public void testDoubleReverseParserSuccess() { String ppl = "source=EMP | reverse | reverse"; RelNode root = getRelNode(ppl); + // Without optimization rule, shows consecutive sorts String expectedLogical = - "" - + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," - + " COMM=[$6], DEPTNO=[$7])\n" - + " LogicalSort(sort0=[$8], dir0=[DESC])\n" - + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," - + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __reverse_row_num__=[ROW_NUMBER() OVER ()])\n" - + " LogicalSort(sort0=[$8], dir0=[DESC])\n" - + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," - + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __reverse_row_num__=[ROW_NUMBER() OVER ()])\n" - + " LogicalTableScan(table=[[scott, EMP]])\n"; + "LogicalSort(sort0=[$0], dir0=[ASC])\n" + + " LogicalSort(sort0=[$0], dir0=[DESC])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`\n" - + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," - + " ROW_NUMBER() OVER () `__reverse_row_num__`\n" - + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," - + " ROW_NUMBER() OVER () `__reverse_row_num__`\n" + "SELECT *\n" + + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`\n" + "FROM `scott`.`EMP`\n" - + "ORDER BY 9 DESC NULLS FIRST) `t0`\n" - + "ORDER BY 9 DESC NULLS FIRST) `t2`"; + + "ORDER BY `EMPNO` DESC NULLS FIRST) `t`\n" + + "ORDER BY `EMPNO` NULLS LAST"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -129,13 +103,8 @@ public void testReverseWithHeadParserSuccess() { String ppl = "source=EMP | reverse | head 2"; RelNode root = getRelNode(ppl); String expectedLogical = - "" - + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," - + " COMM=[$6], DEPTNO=[$7])\n" - + " LogicalSort(sort0=[$8], dir0=[DESC], fetch=[2])\n" - + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," - + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __reverse_row_num__=[ROW_NUMBER() OVER ()])\n" - + " LogicalTableScan(table=[[scott, EMP]])\n"; + "LogicalSort(sort0=[$0], dir0=[DESC], fetch=[2])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedResult = @@ -146,12 +115,7 @@ public void testReverseWithHeadParserSuccess() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`\n" - + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," - + " ROW_NUMBER() OVER () `__reverse_row_num__`\n" - + "FROM `scott`.`EMP`\n" - + "ORDER BY 9 DESC NULLS FIRST\n" - + "LIMIT 2) `t0`"; + "SELECT *\n" + "FROM `scott`.`EMP`\n" + "ORDER BY `EMPNO` DESC NULLS FIRST\n" + "LIMIT 2"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -178,4 +142,114 @@ public void testReverseWithExpressionShouldFail() { String ppl = "source=EMP | reverse EMPNO + 1"; getRelNode(ppl); } + + @Test + public void testMultipleSortsWithReverseParserSuccess() { + String ppl = "source=EMP | sort + SAL | sort - ENAME | reverse"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalSort(sort0=[$1], dir0=[ASC-nulls-first])\n" + + " LogicalSort(sort0=[$1], dir0=[DESC-nulls-last])\n" + + " LogicalSort(sort0=[$5], dir0=[ASC-nulls-first])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT *\n" + + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`\n" + + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`\n" + + "FROM `scott`.`EMP`\n" + + "ORDER BY `SAL`) `t`\n" + + "ORDER BY `ENAME` DESC) `t0`\n" + + "ORDER BY `ENAME`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testMultiFieldSortWithReverseParserSuccess() { + String ppl = "source=EMP | sort + SAL, - ENAME | reverse"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalSort(sort0=[$5], sort1=[$1], dir0=[DESC-nulls-last], dir1=[ASC-nulls-first])\n" + + " LogicalSort(sort0=[$5], sort1=[$1], dir0=[ASC-nulls-first]," + + " dir1=[DESC-nulls-last])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT *\n" + + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`\n" + + "FROM `scott`.`EMP`\n" + + "ORDER BY `SAL`, `ENAME` DESC) `t`\n" + + "ORDER BY `SAL` DESC, `ENAME`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testComplexMultiFieldSortWithReverseParserSuccess() { + String ppl = "source=EMP | sort DEPTNO, + SAL, - ENAME | reverse"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalSort(sort0=[$7], sort1=[$5], sort2=[$1], dir0=[DESC-nulls-last]," + + " dir1=[DESC-nulls-last], dir2=[ASC-nulls-first])\n" + + " LogicalSort(sort0=[$7], sort1=[$5], sort2=[$1], dir0=[ASC-nulls-first]," + + " dir1=[ASC-nulls-first], dir2=[DESC-nulls-last])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT *\n" + + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`\n" + + "FROM `scott`.`EMP`\n" + + "ORDER BY `DEPTNO`, `SAL`, `ENAME` DESC) `t`\n" + + "ORDER BY `DEPTNO` DESC, `SAL` DESC, `ENAME`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testReverseWithFieldsAndSortParserSuccess() { + String ppl = "source=EMP | fields ENAME, SAL, DEPTNO | sort + SAL | reverse"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalSort(sort0=[$1], dir0=[DESC-nulls-last])\n" + + " LogicalSort(sort0=[$1], dir0=[ASC-nulls-first])\n" + + " LogicalProject(ENAME=[$1], SAL=[$5], DEPTNO=[$7])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT *\n" + + "FROM (SELECT `ENAME`, `SAL`, `DEPTNO`\n" + + "FROM `scott`.`EMP`\n" + + "ORDER BY `SAL`) `t0`\n" + + "ORDER BY `SAL` DESC"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testHeadThenSortReverseNoOpt() { + // Tests fetch limit behavior: head 5 | sort field | reverse + // Should NOT be optimized to preserve "take first 5, then sort" semantics + String ppl = "source=EMP | head 5 | sort + SAL | reverse"; + RelNode root = getRelNode(ppl); + + // Should have three LogicalSort nodes: fetch=5, sort SAL, reverse + // Calcite's built-in optimization will handle the physical plan optimization + String expectedLogical = + "LogicalSort(sort0=[$5], dir0=[DESC-nulls-last])\n" + + " LogicalSort(sort0=[$5], dir0=[ASC-nulls-first])\n" + + " LogicalSort(fetch=[5])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT *\n" + + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`\n" + + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`\n" + + "FROM `scott`.`EMP`\n" + + "LIMIT 5) `t`\n" + + "ORDER BY `SAL`) `t0`\n" + + "ORDER BY `SAL` DESC"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } } From 24c5674ce16d2396c7e202553bfe3334b42b48fb Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Mon, 10 Nov 2025 15:05:31 -0800 Subject: [PATCH 04/10] fix IT Signed-off-by: Kai Huang --- .../remote/CalciteReverseCommandIT.java | 64 +++++++++++++++++-- .../calcite/explain_double_reverse_no_op.json | 6 -- .../explain_double_reverse_sort_no_op.json | 6 -- 3 files changed, 60 insertions(+), 16 deletions(-) delete mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_double_reverse_no_op.json delete mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_double_reverse_sort_no_op.json diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteReverseCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteReverseCommandIT.java index 5ff41bcb3f5..b9946f1b19d 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteReverseCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteReverseCommandIT.java @@ -97,14 +97,70 @@ public void testReverseWithComplexPipeline() throws IOException { } @Test - public void testReverseWithMultipleSorts() throws IOException { - // Use the existing BANK data but with a simpler, more predictable query + public void testReverseWithDescendingSort() throws IOException { + // Test reverse with descending sort (- age) JSONObject result = executeQuery( String.format( - "source=%s | sort account_number | fields account_number | reverse | head 3", + "source=%s | sort - account_number | fields account_number | reverse", TEST_INDEX_BANK)); verifySchema(result, schema("account_number", "bigint")); - verifyDataRowsInOrder(result, rows(32), rows(25), rows(20)); + verifyDataRowsInOrder( + result, rows(1), rows(6), rows(13), rows(18), rows(20), rows(25), rows(32)); + } + + @Test + public void testReverseWithMixedSortDirections() throws IOException { + // Test reverse with mixed sort directions (- age, + firstname) + JSONObject result = + executeQuery( + String.format( + "source=%s | sort - account_number, + firstname | fields account_number, firstname" + + " | reverse", + TEST_INDEX_BANK)); + verifySchema(result, schema("account_number", "bigint"), schema("firstname", "string")); + verifyDataRowsInOrder( + result, + rows(1, "Amber JOHnny"), + rows(6, "Hattie"), + rows(13, "Nanette"), + rows(18, "Dale"), + rows(20, "Elinor"), + rows(25, "Virginia"), + rows(32, "Dillard")); + } + + @Test + public void testDoubleReverseWithDescendingSort() throws IOException { + // Test double reverse with descending sort (- age) + JSONObject result = + executeQuery( + String.format( + "source=%s | sort - account_number | fields account_number | reverse | reverse", + TEST_INDEX_BANK)); + verifySchema(result, schema("account_number", "bigint")); + verifyDataRowsInOrder( + result, rows(32), rows(25), rows(20), rows(18), rows(13), rows(6), rows(1)); + } + + @Test + public void testDoubleReverseWithMixedSortDirections() throws IOException { + // Test double reverse with mixed sort directions (- age, + firstname) + JSONObject result = + executeQuery( + String.format( + "source=%s | sort - account_number, + firstname | fields account_number, firstname" + + " | reverse | reverse", + TEST_INDEX_BANK)); + verifySchema(result, schema("account_number", "bigint"), schema("firstname", "string")); + verifyDataRowsInOrder( + result, + rows(32, "Dillard"), + rows(25, "Virginia"), + rows(20, "Elinor"), + rows(18, "Dale"), + rows(13, "Nanette"), + rows(6, "Hattie"), + rows(1, "Amber JOHnny")); } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_double_reverse_no_op.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_double_reverse_no_op.json deleted file mode 100644 index 57ecf12a092..00000000000 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_double_reverse_no_op.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "calcite": { - "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(cpu_usage=[$0], @timestamp=[$1])\n LogicalSort(sort0=[$8], dir0=[DESC])\n LogicalProject(cpu_usage=[$0], @timestamp=[$1], _id=[$2], _index=[$3], _score=[$4], _maxscore=[$5], _sort=[$6], _routing=[$7], __reverse_row_num__=[ROW_NUMBER() OVER ()])\n LogicalSort(sort0=[$8], dir0=[DESC])\n LogicalProject(cpu_usage=[$0], @timestamp=[$1], _id=[$2], _index=[$3], _score=[$4], _maxscore=[$5], _sort=[$6], _routing=[$7], __reverse_row_num__=[ROW_NUMBER() OVER ()])\n CalciteLogicalIndexScan(table=[[OpenSearch, events]])\n", - "physical": "EnumerableCalc(expr#0..3=[{inputs}], proj#0..1=[{exprs}])\n EnumerableLimit(fetch=[10000])\n EnumerableSort(sort0=[$3], dir0=[DESC])\n EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])])\n EnumerableSort(sort0=[$2], dir0=[DESC])\n EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])])\n CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[PROJECT->[cpu_usage, @timestamp]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"cpu_usage\",\"@timestamp\"],\"excludes\":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" - } -} diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_double_reverse_sort_no_op.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_double_reverse_sort_no_op.json deleted file mode 100644 index e3ad90650d7..00000000000 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_double_reverse_sort_no_op.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "calcite": { - "logical": "LogicalSystemLimit(sort0=[$8], sort1=[$1], dir0=[DESC-nulls-last], dir1=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalSort(sort0=[$8], sort1=[$1], dir0=[DESC-nulls-last], dir1=[ASC-nulls-first])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SORT->[{\n \"age\" : {\n \"order\" : \"desc\",\n \"missing\" : \"_last\"\n }\n}, {\n \"firstname.keyword\" : {\n \"order\" : \"asc\",\n \"missing\" : \"_first\"\n }\n}], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]},\"sort\":[{\"age\":{\"order\":\"desc\",\"missing\":\"_last\"}},{\"firstname.keyword\":{\"order\":\"asc\",\"missing\":\"_first\"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" - } -} From 80b8b1d6aa70c42b9de628281c2c73c8c4a16ca9 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Mon, 10 Nov 2025 15:21:51 -0800 Subject: [PATCH 05/10] update ExplainIT Signed-off-by: Kai Huang --- .../sql/calcite/remote/CalciteExplainIT.java | 18 +++++++++--------- .../calcite/explain_reverse_fallback.json | 6 ------ .../calcite/explain_reverse_fallback.yaml | 14 ++++++++++++++ .../explain_reverse_pushdown_multiple.json | 6 ------ .../explain_reverse_pushdown_multiple.yaml | 19 +++++++++++++++++++ .../explain_reverse_pushdown_single.json | 6 ------ .../explain_reverse_pushdown_single.yaml | 14 ++++++++++++++ .../explain_reverse_fallback.json | 6 ------ .../explain_reverse_fallback.yaml | 14 ++++++++++++++ .../explain_reverse_pushdown_multiple.json | 6 ------ .../explain_reverse_pushdown_multiple.yaml | 12 ++++++++++++ .../explain_reverse_pushdown_single.json | 6 ------ .../explain_reverse_pushdown_single.yaml | 12 ++++++++++++ 13 files changed, 94 insertions(+), 45 deletions(-) delete mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_fallback.json create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_fallback.yaml delete mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_pushdown_multiple.json create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_pushdown_multiple.yaml delete mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_pushdown_single.json create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_pushdown_single.yaml delete mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_fallback.json create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_fallback.yaml delete mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_multiple.json create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_multiple.yaml delete mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_single.json create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_single.yaml diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index a7e94374eff..5d9800c6d26 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -412,25 +412,25 @@ public void testFilterWithSearchCall() throws IOException { @Test public void testExplainWithReverse() throws IOException { String query = "source=opensearch-sql_test_index_account | reverse | head 5"; - var result = explainQueryToString(query); - String expected = loadExpectedPlan("explain_reverse_fallback.json"); - assertJsonEqualsIgnoreId(expected, result); + var result = explainQueryYaml(query); + String expected = loadExpectedPlan("explain_reverse_fallback.yaml"); + assertYamlEqualsIgnoreId(expected, result); } @Test public void testExplainWithReversePushdown() throws IOException { String query = "source=opensearch-sql_test_index_account | sort - age | reverse"; - var result = explainQueryToString(query); - String expected = loadExpectedPlan("explain_reverse_pushdown_single.json"); - assertJsonEqualsIgnoreId(expected, result); + var result = explainQueryYaml(query); + String expected = loadExpectedPlan("explain_reverse_pushdown_single.yaml"); + assertYamlEqualsIgnoreId(expected, result); } @Test public void testExplainWithReversePushdownMultipleFields() throws IOException { String query = "source=opensearch-sql_test_index_account | sort - age, + firstname | reverse"; - var result = explainQueryToString(query); - String expected = loadExpectedPlan("explain_reverse_pushdown_multiple.json"); - assertJsonEqualsIgnoreId(expected, result); + var result = explainQueryYaml(query); + String expected = loadExpectedPlan("explain_reverse_pushdown_multiple.yaml"); + assertYamlEqualsIgnoreId(expected, result); } @Test diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_fallback.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_fallback.json deleted file mode 100644 index 8cf7b29bd62..00000000000 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_fallback.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "calcite": { - "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalSort(sort0=[$17], dir0=[DESC], fetch=[5])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __reverse_row_num__=[ROW_NUMBER() OVER ()])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..11=[{inputs}], proj#0..10=[{exprs}])\n EnumerableLimit(fetch=[5])\n EnumerableSort(sort0=[$11], dir0=[DESC])\n EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" - } -} diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_fallback.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_fallback.yaml new file mode 100644 index 00000000000..b8b3f865b6e --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_fallback.yaml @@ -0,0 +1,14 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) + LogicalSort(sort0=[$17], dir0=[DESC], fetch=[5]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __reverse_row_num__=[ROW_NUMBER() OVER ()]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..11=[{inputs}], proj#0..10=[{exprs}]) + EnumerableLimit(fetch=[5]) + EnumerableSort(sort0=[$11], dir0=[DESC]) + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_pushdown_multiple.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_pushdown_multiple.json deleted file mode 100644 index f4d73f04866..00000000000 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_pushdown_multiple.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "calcite": { - "logical": "LogicalSystemLimit(sort0=[$8], sort1=[$1], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalSort(sort0=[$8], sort1=[$1], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last])\n LogicalSort(sort0=[$8], sort1=[$1], dir0=[DESC-nulls-last], dir1=[ASC-nulls-first])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SORT->[{\n \"age\" : {\n \"order\" : \"asc\",\n \"missing\" : \"_first\"\n }\n}, {\n \"firstname.keyword\" : {\n \"order\" : \"desc\",\n \"missing\" : \"_last\"\n }\n}], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]},\"sort\":[{\"age\":{\"order\":\"asc\",\"missing\":\"_first\"}},{\"firstname.keyword\":{\"order\":\"desc\",\"missing\":\"_last\"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" - } -} diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_pushdown_multiple.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_pushdown_multiple.yaml new file mode 100644 index 00000000000..2132340e162 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_pushdown_multiple.yaml @@ -0,0 +1,19 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$8], sort1=[$1], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) + LogicalSort(sort0=[$8], sort1=[$1], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last]) + LogicalSort(sort0=[$8], sort1=[$1], dir0=[DESC-nulls-last], dir1=[ASC-nulls-first]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SORT->[{ + "age" : { + "order" : "asc", + "missing" : "_first" + } + }, { + "firstname.keyword" : { + "order" : "desc", + "missing" : "_last" + } + }], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]},"sort":[{"age":{"order":"asc","missing":"_first"}},{"firstname.keyword":{"order":"desc","missing":"_last"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_pushdown_single.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_pushdown_single.json deleted file mode 100644 index 491b20a4a01..00000000000 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_pushdown_single.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "calcite": { - "logical": "LogicalSystemLimit(sort0=[$8], dir0=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalSort(sort0=[$8], dir0=[ASC-nulls-first])\n LogicalSort(sort0=[$8], dir0=[DESC-nulls-last])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SORT->[{\n \"age\" : {\n \"order\" : \"asc\",\n \"missing\" : \"_first\"\n }\n}], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]},\"sort\":[{\"age\":{\"order\":\"asc\",\"missing\":\"_first\"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" - } -} diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_pushdown_single.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_pushdown_single.yaml new file mode 100644 index 00000000000..33d7c0f0cf6 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_pushdown_single.yaml @@ -0,0 +1,14 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$8], dir0=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) + LogicalSort(sort0=[$8], dir0=[ASC-nulls-first]) + LogicalSort(sort0=[$8], dir0=[DESC-nulls-last]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SORT->[{ + "age" : { + "order" : "asc", + "missing" : "_first" + } + }], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]},"sort":[{"age":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_fallback.json b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_fallback.json deleted file mode 100644 index 723d977fb9d..00000000000 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_fallback.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "calcite": { - "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalSort(sort0=[$17], dir0=[DESC], fetch=[5])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __reverse_row_num__=[ROW_NUMBER() OVER ()])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..17=[{inputs}], proj#0..10=[{exprs}])\n EnumerableLimit(fetch=[5])\n EnumerableSort(sort0=[$17], dir0=[DESC])\n EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n" - } -} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_fallback.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_fallback.yaml new file mode 100644 index 00000000000..326e9186c30 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_fallback.yaml @@ -0,0 +1,14 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) + LogicalSort(sort0=[$17], dir0=[DESC], fetch=[5]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __reverse_row_num__=[ROW_NUMBER() OVER ()]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..17=[{inputs}], proj#0..10=[{exprs}]) + EnumerableLimit(fetch=[5]) + EnumerableSort(sort0=[$17], dir0=[DESC]) + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_multiple.json b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_multiple.json deleted file mode 100644 index 10915d929e6..00000000000 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_multiple.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "calcite": { - "logical": "LogicalSystemLimit(sort0=[$8], sort1=[$1], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalSort(sort0=[$8], sort1=[$1], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last])\n LogicalSort(sort0=[$8], sort1=[$1], dir0=[DESC-nulls-last], dir1=[ASC-nulls-first])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n EnumerableSort(sort0=[$8], sort1=[$1], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last])\n EnumerableCalc(expr#0..16=[{inputs}], proj#0..10=[{exprs}])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n" - } -} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_multiple.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_multiple.yaml new file mode 100644 index 00000000000..bdb37931ed3 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_multiple.yaml @@ -0,0 +1,12 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$8], sort1=[$1], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) + LogicalSort(sort0=[$8], sort1=[$1], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last]) + LogicalSort(sort0=[$8], sort1=[$1], dir0=[DESC-nulls-last], dir1=[ASC-nulls-first]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableSort(sort0=[$8], sort1=[$1], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last]) + EnumerableCalc(expr#0..16=[{inputs}], proj#0..10=[{exprs}]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_single.json b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_single.json deleted file mode 100644 index 03135221480..00000000000 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_single.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "calcite": { - "logical": "LogicalSystemLimit(sort0=[$8], dir0=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10])\n LogicalSort(sort0=[$8], dir0=[ASC-nulls-first])\n LogicalSort(sort0=[$8], dir0=[DESC-nulls-last])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n EnumerableSort(sort0=[$8], dir0=[ASC-nulls-first])\n EnumerableCalc(expr#0..16=[{inputs}], proj#0..10=[{exprs}])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n" - } -} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_single.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_single.yaml new file mode 100644 index 00000000000..a1ecb6c3b38 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_pushdown_single.yaml @@ -0,0 +1,12 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$8], dir0=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) + LogicalSort(sort0=[$8], dir0=[ASC-nulls-first]) + LogicalSort(sort0=[$8], dir0=[DESC-nulls-last]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableSort(sort0=[$8], dir0=[ASC-nulls-first]) + EnumerableCalc(expr#0..16=[{inputs}], proj#0..10=[{exprs}]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) From 70e4a6dd0c21eb32f511bf1343625d20f8ee5ea1 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Mon, 10 Nov 2025 15:40:17 -0800 Subject: [PATCH 06/10] Add double reverse explain Signed-off-by: Kai Huang --- .../sql/calcite/remote/CalciteExplainIT.java | 25 +++++++++++++++++++ .../explain_double_reverse_fallback.yaml | 17 +++++++++++++ ...lain_double_reverse_pushdown_multiple.yaml | 20 +++++++++++++++ ...xplain_double_reverse_pushdown_single.yaml | 15 +++++++++++ .../explain_double_reverse_fallback.yaml | 17 +++++++++++++ ...lain_double_reverse_pushdown_multiple.yaml | 13 ++++++++++ ...xplain_double_reverse_pushdown_single.yaml | 13 ++++++++++ 7 files changed, 120 insertions(+) create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_double_reverse_fallback.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_double_reverse_pushdown_multiple.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_double_reverse_pushdown_single.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_double_reverse_fallback.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_double_reverse_pushdown_multiple.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_double_reverse_pushdown_single.yaml diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index 5d9800c6d26..5b066a0093f 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -433,6 +433,31 @@ public void testExplainWithReversePushdownMultipleFields() throws IOException { assertYamlEqualsIgnoreId(expected, result); } + @Test + public void testExplainWithDoubleReverse() throws IOException { + String query = "source=opensearch-sql_test_index_account | reverse | reverse"; + var result = explainQueryYaml(query); + String expected = loadExpectedPlan("explain_double_reverse_fallback.yaml"); + assertYamlEqualsIgnoreId(expected, result); + } + + @Test + public void testExplainWithDoubleReversePushdown() throws IOException { + String query = "source=opensearch-sql_test_index_account | sort - age | reverse | reverse"; + var result = explainQueryYaml(query); + String expected = loadExpectedPlan("explain_double_reverse_pushdown_single.yaml"); + assertYamlEqualsIgnoreId(expected, result); + } + + @Test + public void testExplainWithDoubleReversePushdownMultipleFields() throws IOException { + String query = + "source=opensearch-sql_test_index_account | sort - age, + firstname | reverse | reverse"; + var result = explainQueryYaml(query); + String expected = loadExpectedPlan("explain_double_reverse_pushdown_multiple.yaml"); + assertYamlEqualsIgnoreId(expected, result); + } + @Test public void testExplainWithTimechartAvg() throws IOException { var result = explainQueryYaml("source=events | timechart span=1m avg(cpu_usage) by host"); diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_double_reverse_fallback.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_double_reverse_fallback.yaml new file mode 100644 index 00000000000..bdfb488f88e --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_double_reverse_fallback.yaml @@ -0,0 +1,17 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) + LogicalSort(sort0=[$17], dir0=[DESC]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __reverse_row_num__=[ROW_NUMBER() OVER ()]) + LogicalSort(sort0=[$17], dir0=[DESC]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __reverse_row_num__=[ROW_NUMBER() OVER ()]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableCalc(expr#0..18=[{inputs}], proj#0..10=[{exprs}]) + EnumerableLimit(fetch=[10000]) + EnumerableSort(sort0=[$18], dir0=[DESC]) + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableSort(sort0=[$17], dir0=[DESC]) + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_double_reverse_pushdown_multiple.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_double_reverse_pushdown_multiple.yaml new file mode 100644 index 00000000000..fb556df43f6 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_double_reverse_pushdown_multiple.yaml @@ -0,0 +1,20 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$8], sort1=[$1], dir0=[DESC-nulls-last], dir1=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) + LogicalSort(sort0=[$8], sort1=[$1], dir0=[DESC-nulls-last], dir1=[ASC-nulls-first]) + LogicalSort(sort0=[$8], sort1=[$1], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last]) + LogicalSort(sort0=[$8], sort1=[$1], dir0=[DESC-nulls-last], dir1=[ASC-nulls-first]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SORT->[{ + "age" : { + "order" : "desc", + "missing" : "_last" + } + }, { + "firstname.keyword" : { + "order" : "asc", + "missing" : "_first" + } + }], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]},"sort":[{"age":{"order":"desc","missing":"_last"}},{"firstname.keyword":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_double_reverse_pushdown_single.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_double_reverse_pushdown_single.yaml new file mode 100644 index 00000000000..0f0843b2964 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_double_reverse_pushdown_single.yaml @@ -0,0 +1,15 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$8], dir0=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) + LogicalSort(sort0=[$8], dir0=[DESC-nulls-last]) + LogicalSort(sort0=[$8], dir0=[ASC-nulls-first]) + LogicalSort(sort0=[$8], dir0=[DESC-nulls-last]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SORT->[{ + "age" : { + "order" : "desc", + "missing" : "_last" + } + }], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]},"sort":[{"age":{"order":"desc","missing":"_last"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_double_reverse_fallback.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_double_reverse_fallback.yaml new file mode 100644 index 00000000000..bdfb488f88e --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_double_reverse_fallback.yaml @@ -0,0 +1,17 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) + LogicalSort(sort0=[$17], dir0=[DESC]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __reverse_row_num__=[ROW_NUMBER() OVER ()]) + LogicalSort(sort0=[$17], dir0=[DESC]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __reverse_row_num__=[ROW_NUMBER() OVER ()]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableCalc(expr#0..18=[{inputs}], proj#0..10=[{exprs}]) + EnumerableLimit(fetch=[10000]) + EnumerableSort(sort0=[$18], dir0=[DESC]) + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableSort(sort0=[$17], dir0=[DESC]) + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_double_reverse_pushdown_multiple.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_double_reverse_pushdown_multiple.yaml new file mode 100644 index 00000000000..1bce5d3a0da --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_double_reverse_pushdown_multiple.yaml @@ -0,0 +1,13 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$8], sort1=[$1], dir0=[DESC-nulls-last], dir1=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) + LogicalSort(sort0=[$8], sort1=[$1], dir0=[DESC-nulls-last], dir1=[ASC-nulls-first]) + LogicalSort(sort0=[$8], sort1=[$1], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last]) + LogicalSort(sort0=[$8], sort1=[$1], dir0=[DESC-nulls-last], dir1=[ASC-nulls-first]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableSort(sort0=[$8], sort1=[$1], dir0=[DESC-nulls-last], dir1=[ASC-nulls-first]) + EnumerableCalc(expr#0..16=[{inputs}], proj#0..10=[{exprs}]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_double_reverse_pushdown_single.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_double_reverse_pushdown_single.yaml new file mode 100644 index 00000000000..c63f25b8986 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_double_reverse_pushdown_single.yaml @@ -0,0 +1,13 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$8], dir0=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) + LogicalSort(sort0=[$8], dir0=[DESC-nulls-last]) + LogicalSort(sort0=[$8], dir0=[ASC-nulls-first]) + LogicalSort(sort0=[$8], dir0=[DESC-nulls-last]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableSort(sort0=[$8], dir0=[DESC-nulls-last]) + EnumerableCalc(expr#0..16=[{inputs}], proj#0..10=[{exprs}]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) From fd86feeae7691c15f920bcae534dfe1caa10cd48 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Mon, 10 Nov 2025 16:44:20 -0800 Subject: [PATCH 07/10] fix ExplainIT Signed-off-by: Kai Huang --- .../calcite/explain_double_reverse_fallback.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_double_reverse_fallback.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_double_reverse_fallback.yaml index bdfb488f88e..27ec6e29da4 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_double_reverse_fallback.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_double_reverse_fallback.yaml @@ -8,10 +8,10 @@ calcite: LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __reverse_row_num__=[ROW_NUMBER() OVER ()]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - EnumerableCalc(expr#0..18=[{inputs}], proj#0..10=[{exprs}]) + EnumerableCalc(expr#0..12=[{inputs}], proj#0..10=[{exprs}]) EnumerableLimit(fetch=[10000]) - EnumerableSort(sort0=[$18], dir0=[DESC]) + EnumerableSort(sort0=[$12], dir0=[DESC]) EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - EnumerableSort(sort0=[$17], dir0=[DESC]) + EnumerableSort(sort0=[$11], dir0=[DESC]) EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file From 39aecf621eb1b86ea57c84fc5660dabe66c03bd1 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Wed, 12 Nov 2025 13:57:19 -0800 Subject: [PATCH 08/10] udpated implementation: ignore reverse if no collation/@timestamp found Signed-off-by: Kai Huang --- .../sql/calcite/CalciteRelNodeVisitor.java | 22 +++--- .../sql/calcite/remote/CalciteExplainIT.java | 19 +++-- .../remote/CalciteReverseCommandIT.java | 71 +++++++++++++++++-- .../explain_double_reverse_fallback.yaml | 17 ----- .../explain_double_reverse_ignored.yaml | 7 ++ .../calcite/explain_reverse_fallback.yaml | 14 ---- .../calcite/explain_reverse_ignored.yaml | 8 +++ .../explain_reverse_with_timestamp.yaml | 13 ++++ .../explain_double_reverse_fallback.yaml | 17 ----- .../explain_double_reverse_ignored.yaml | 9 +++ .../explain_reverse_fallback.yaml | 14 ---- .../explain_reverse_ignored.yaml | 11 +++ .../explain_reverse_with_timestamp.yaml | 12 ++++ .../ppl/calcite/CalcitePPLReverseTest.java | 13 ++++ 14 files changed, 163 insertions(+), 84 deletions(-) delete mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_double_reverse_fallback.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_double_reverse_ignored.yaml delete mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_fallback.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_ignored.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_with_timestamp.yaml delete mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_double_reverse_fallback.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_double_reverse_ignored.yaml delete mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_fallback.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_ignored.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_with_timestamp.yaml diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 24bab0b7445..ac92ac8d73e 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -669,8 +669,6 @@ public RelNode visitHead(Head node, CalcitePlanContext context) { return context.relBuilder.peek(); } - private static final String REVERSE_ROW_NUM = "__reverse_row_num__"; - @Override public RelNode visitReverse( org.opensearch.sql.ast.tree.Reverse node, CalcitePlanContext context) { @@ -686,17 +684,15 @@ public RelNode visitReverse( RelCollation reversedCollation = PlanUtils.reverseCollation(collation); context.relBuilder.sort(reversedCollation); } else { - // Fallback: use ROW_NUMBER approach when no existing sort - RexNode rowNumber = - context - .relBuilder - .aggregateCall(SqlStdOperatorTable.ROW_NUMBER) - .over() - .rowsTo(RexWindowBounds.CURRENT_ROW) - .as(REVERSE_ROW_NUM); - context.relBuilder.projectPlus(rowNumber); - context.relBuilder.sort(context.relBuilder.desc(context.relBuilder.field(REVERSE_ROW_NUM))); - context.relBuilder.projectExcept(context.relBuilder.field(REVERSE_ROW_NUM)); + // Check if @timestamp field exists in the row type + List fieldNames = context.relBuilder.peek().getRowType().getFieldNames(); + if (fieldNames.contains(OpenSearchConstants.IMPLICIT_FIELD_TIMESTAMP)) { + // If @timestamp exists, sort by it in descending order + context.relBuilder.sort( + context.relBuilder.desc( + context.relBuilder.field(OpenSearchConstants.IMPLICIT_FIELD_TIMESTAMP))); + } + // If neither collation nor @timestamp exists, ignore the reverse command (no-op) } return context.relBuilder.peek(); diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index 5b066a0093f..89ee5fddc3e 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -410,10 +410,11 @@ public void testFilterWithSearchCall() throws IOException { } @Test - public void testExplainWithReverse() throws IOException { + public void testExplainWithReverseIgnored() throws IOException { + // Reverse is ignored when there's no existing sort and no @timestamp field String query = "source=opensearch-sql_test_index_account | reverse | head 5"; var result = explainQueryYaml(query); - String expected = loadExpectedPlan("explain_reverse_fallback.yaml"); + String expected = loadExpectedPlan("explain_reverse_ignored.yaml"); assertYamlEqualsIgnoreId(expected, result); } @@ -434,10 +435,11 @@ public void testExplainWithReversePushdownMultipleFields() throws IOException { } @Test - public void testExplainWithDoubleReverse() throws IOException { + public void testExplainWithDoubleReverseIgnored() throws IOException { + // Double reverse is ignored when there's no existing sort and no @timestamp field String query = "source=opensearch-sql_test_index_account | reverse | reverse"; var result = explainQueryYaml(query); - String expected = loadExpectedPlan("explain_double_reverse_fallback.yaml"); + String expected = loadExpectedPlan("explain_double_reverse_ignored.yaml"); assertYamlEqualsIgnoreId(expected, result); } @@ -458,6 +460,15 @@ public void testExplainWithDoubleReversePushdownMultipleFields() throws IOExcept assertYamlEqualsIgnoreId(expected, result); } + @Test + public void testExplainReverseWithTimestamp() throws IOException { + // Test that reverse with @timestamp field sorts by @timestamp DESC + String query = "source=opensearch-sql_test_index_time_data | reverse | head 5"; + var result = explainQueryYaml(query); + String expected = loadExpectedPlan("explain_reverse_with_timestamp.yaml"); + assertYamlEqualsIgnoreId(expected, result); + } + @Test public void testExplainWithTimechartAvg() throws IOException { var result = explainQueryYaml("source=events | timechart span=1m avg(cpu_usage) by host"); diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteReverseCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteReverseCommandIT.java index b9946f1b19d..3d1845fcd0d 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteReverseCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteReverseCommandIT.java @@ -6,6 +6,7 @@ package org.opensearch.sql.calcite.remote; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_TIME_DATA; import static org.opensearch.sql.util.MatcherUtils.rows; import static org.opensearch.sql.util.MatcherUtils.schema; import static org.opensearch.sql.util.MatcherUtils.verifyDataRowsInOrder; @@ -24,12 +25,16 @@ public void init() throws Exception { enableCalcite(); disallowCalciteFallback(); loadIndex(Index.BANK); + loadIndex(Index.TIME_TEST_DATA); } @Test public void testReverse() throws IOException { JSONObject result = - executeQuery(String.format("source=%s | fields account_number | reverse", TEST_INDEX_BANK)); + executeQuery( + String.format( + "source=%s | fields account_number | sort account_number | reverse", + TEST_INDEX_BANK)); verifySchema(result, schema("account_number", "bigint")); verifyDataRowsInOrder( result, rows(32), rows(25), rows(20), rows(18), rows(13), rows(6), rows(1)); @@ -40,7 +45,8 @@ public void testReverseWithFields() throws IOException { JSONObject result = executeQuery( String.format( - "source=%s | fields account_number, firstname | reverse", TEST_INDEX_BANK)); + "source=%s | fields account_number, firstname | sort account_number | reverse", + TEST_INDEX_BANK)); verifySchema(result, schema("account_number", "bigint"), schema("firstname", "string")); verifyDataRowsInOrder( result, @@ -70,7 +76,8 @@ public void testDoubleReverse() throws IOException { JSONObject result = executeQuery( String.format( - "source=%s | fields account_number | reverse | reverse", TEST_INDEX_BANK)); + "source=%s | fields account_number | sort account_number | reverse | reverse", + TEST_INDEX_BANK)); verifySchema(result, schema("account_number", "bigint")); verifyDataRowsInOrder( result, rows(1), rows(6), rows(13), rows(18), rows(20), rows(25), rows(32)); @@ -80,7 +87,9 @@ public void testDoubleReverse() throws IOException { public void testReverseWithHead() throws IOException { JSONObject result = executeQuery( - String.format("source=%s | fields account_number | reverse | head 3", TEST_INDEX_BANK)); + String.format( + "source=%s | fields account_number | sort account_number | reverse | head 3", + TEST_INDEX_BANK)); verifySchema(result, schema("account_number", "bigint")); verifyDataRowsInOrder(result, rows(32), rows(25), rows(20)); } @@ -90,7 +99,8 @@ public void testReverseWithComplexPipeline() throws IOException { JSONObject result = executeQuery( String.format( - "source=%s | where account_number > 18 | fields account_number | reverse | head 2", + "source=%s | where account_number > 18 | fields account_number | sort" + + " account_number | reverse | head 2", TEST_INDEX_BANK)); verifySchema(result, schema("account_number", "bigint")); verifyDataRowsInOrder(result, rows(32), rows(25)); @@ -163,4 +173,55 @@ public void testDoubleReverseWithMixedSortDirections() throws IOException { rows(6, "Hattie"), rows(1, "Amber JOHnny")); } + + @Test + public void testReverseIgnoredWithoutSortOrTimestamp() throws IOException { + // Test that reverse is ignored when there's no explicit sort and no @timestamp field + // BANK index doesn't have @timestamp, so reverse should be ignored + JSONObject result = + executeQuery( + String.format("source=%s | fields account_number | reverse | head 3", TEST_INDEX_BANK)); + verifySchema(result, schema("account_number", "bigint")); + // Without sort or @timestamp, reverse is ignored, so data comes in natural order + // The first 3 documents in natural order (ascending by account_number) + verifyDataRowsInOrder(result, rows(1), rows(6), rows(13)); + } + + @Test + public void testReverseWithTimestampField() throws IOException { + // Test that reverse with @timestamp field sorts by @timestamp DESC + // TIME_TEST_DATA index has @timestamp field + JSONObject result = + executeQuery( + String.format( + "source=%s | fields value, category, `@timestamp` | reverse | head 5", + TEST_INDEX_TIME_DATA)); + verifySchema( + result, + schema("value", "int"), + schema("category", "string"), + schema("@timestamp", "timestamp")); + // Should return the latest 5 records (highest @timestamp values) in descending order + // Based on the test data, these are IDs 100, 99, 98, 97, 96 + verifyDataRowsInOrder( + result, + rows(8762, "A", "2025-08-01 03:47:41"), + rows(7348, "C", "2025-08-01 02:00:56"), + rows(9015, "B", "2025-08-01 01:14:11"), + rows(6489, "D", "2025-08-01 00:27:26"), + rows(8676, "A", "2025-07-31 23:40:33")); + } + + @Test + public void testReverseWithTimestampAndExplicitSort() throws IOException { + // Test that explicit sort takes precedence over @timestamp + JSONObject result = + executeQuery( + String.format( + "source=%s | fields value, category | sort value | reverse | head 3", + TEST_INDEX_TIME_DATA)); + verifySchema(result, schema("value", "int"), schema("category", "string")); + // Should reverse the value sort, giving us the highest values + verifyDataRowsInOrder(result, rows(9521, "B"), rows(9367, "A"), rows(9321, "A")); + } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_double_reverse_fallback.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_double_reverse_fallback.yaml deleted file mode 100644 index 27ec6e29da4..00000000000 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_double_reverse_fallback.yaml +++ /dev/null @@ -1,17 +0,0 @@ -calcite: - logical: | - LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) - LogicalSort(sort0=[$17], dir0=[DESC]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __reverse_row_num__=[ROW_NUMBER() OVER ()]) - LogicalSort(sort0=[$17], dir0=[DESC]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __reverse_row_num__=[ROW_NUMBER() OVER ()]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) - physical: | - EnumerableCalc(expr#0..12=[{inputs}], proj#0..10=[{exprs}]) - EnumerableLimit(fetch=[10000]) - EnumerableSort(sort0=[$12], dir0=[DESC]) - EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - EnumerableSort(sort0=[$11], dir0=[DESC]) - EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_double_reverse_ignored.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_double_reverse_ignored.yaml new file mode 100644 index 00000000000..24d8c63fcd0 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_double_reverse_ignored.yaml @@ -0,0 +1,7 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_fallback.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_fallback.yaml deleted file mode 100644 index b8b3f865b6e..00000000000 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_fallback.yaml +++ /dev/null @@ -1,14 +0,0 @@ -calcite: - logical: | - LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) - LogicalSort(sort0=[$17], dir0=[DESC], fetch=[5]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __reverse_row_num__=[ROW_NUMBER() OVER ()]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) - physical: | - EnumerableLimit(fetch=[10000]) - EnumerableCalc(expr#0..11=[{inputs}], proj#0..10=[{exprs}]) - EnumerableLimit(fetch=[5]) - EnumerableSort(sort0=[$11], dir0=[DESC]) - EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_ignored.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_ignored.yaml new file mode 100644 index 00000000000..083010dd70e --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_ignored.yaml @@ -0,0 +1,8 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) + LogicalSort(fetch=[5]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], LIMIT->5, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_with_timestamp.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_with_timestamp.yaml new file mode 100644 index 00000000000..7c383f34584 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_reverse_with_timestamp.yaml @@ -0,0 +1,13 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$0], dir0=[DESC], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(@timestamp=[$0], category=[$1], value=[$2], timestamp=[$3]) + LogicalSort(sort0=[$0], dir0=[DESC], fetch=[5]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[PROJECT->[@timestamp, category, value, timestamp], SORT->[{ + "@timestamp" : { + "order" : "desc", + "missing" : "_first" + } + }], LIMIT->5, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["@timestamp","category","value","timestamp"],"excludes":[]},"sort":[{"@timestamp":{"order":"desc","missing":"_first"}}]}, requestedTotalSize=5, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_double_reverse_fallback.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_double_reverse_fallback.yaml deleted file mode 100644 index bdfb488f88e..00000000000 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_double_reverse_fallback.yaml +++ /dev/null @@ -1,17 +0,0 @@ -calcite: - logical: | - LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) - LogicalSort(sort0=[$17], dir0=[DESC]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __reverse_row_num__=[ROW_NUMBER() OVER ()]) - LogicalSort(sort0=[$17], dir0=[DESC]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __reverse_row_num__=[ROW_NUMBER() OVER ()]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) - physical: | - EnumerableCalc(expr#0..18=[{inputs}], proj#0..10=[{exprs}]) - EnumerableLimit(fetch=[10000]) - EnumerableSort(sort0=[$18], dir0=[DESC]) - EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - EnumerableSort(sort0=[$17], dir0=[DESC]) - EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_double_reverse_ignored.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_double_reverse_ignored.yaml new file mode 100644 index 00000000000..79f52ecc188 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_double_reverse_ignored.yaml @@ -0,0 +1,9 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..16=[{inputs}], proj#0..10=[{exprs}]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_fallback.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_fallback.yaml deleted file mode 100644 index 326e9186c30..00000000000 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_fallback.yaml +++ /dev/null @@ -1,14 +0,0 @@ -calcite: - logical: | - LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) - LogicalSort(sort0=[$17], dir0=[DESC], fetch=[5]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __reverse_row_num__=[ROW_NUMBER() OVER ()]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) - physical: | - EnumerableLimit(fetch=[10000]) - EnumerableCalc(expr#0..17=[{inputs}], proj#0..10=[{exprs}]) - EnumerableLimit(fetch=[5]) - EnumerableSort(sort0=[$17], dir0=[DESC]) - EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_ignored.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_ignored.yaml new file mode 100644 index 00000000000..0fb2d7e597d --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_ignored.yaml @@ -0,0 +1,11 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) + LogicalSort(fetch=[5]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..16=[{inputs}], proj#0..10=[{exprs}]) + EnumerableLimit(fetch=[5]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_with_timestamp.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_with_timestamp.yaml new file mode 100644 index 00000000000..e3095d13abc --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_reverse_with_timestamp.yaml @@ -0,0 +1,12 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$0], dir0=[DESC], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(@timestamp=[$0], category=[$1], value=[$2], timestamp=[$3]) + LogicalSort(sort0=[$0], dir0=[DESC], fetch=[5]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..9=[{inputs}], proj#0..3=[{exprs}]) + EnumerableLimit(fetch=[5]) + EnumerableSort(sort0=[$0], dir0=[DESC]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReverseTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReverseTest.java index 7e732648185..3fd518d9431 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReverseTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReverseTest.java @@ -9,6 +9,18 @@ import org.apache.calcite.test.CalciteAssert; import org.junit.Test; +/** + * Tests for reverse command optimization. + * + *

The reverse command behavior depends on the presence of: 1. Existing collation (sort): Reverse + * the sort direction 2. @timestamp field: Sort by @timestamp DESC 3. Neither: No-op (ignore reverse + * command) + * + *

These tests use SCOTT_WITH_TEMPORAL schema where EMP table has a default collation on EMPNO + * (primary key), demonstrating case #1 (reverse existing collation). + * + *

For @timestamp and no-op cases, see CalciteReverseCommandIT integration tests. + */ public class CalcitePPLReverseTest extends CalcitePPLAbstractTest { public CalcitePPLReverseTest() { super(CalciteAssert.SchemaSpec.SCOTT_WITH_TEMPORAL); @@ -16,6 +28,7 @@ public CalcitePPLReverseTest() { @Test public void testReverseParserSuccess() { + // EMP table has default collation on EMPNO, so reverse flips it to DESC String ppl = "source=EMP | reverse"; RelNode root = getRelNode(ppl); String expectedLogical = From 5ea90dd7d95f9388fae7f314e9d9aa76b98a2641 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Wed, 12 Nov 2025 14:14:18 -0800 Subject: [PATCH 09/10] Update doc Signed-off-by: Kai Huang --- docs/category.json | 1 + docs/user/ppl/cmd/reverse.rst | 90 ++++++++++++++++++++++++----------- 2 files changed, 64 insertions(+), 27 deletions(-) diff --git a/docs/category.json b/docs/category.json index f126904da6a..dcfb07f78ff 100644 --- a/docs/category.json +++ b/docs/category.json @@ -42,6 +42,7 @@ "user/ppl/cmd/rename.rst", "user/ppl/cmd/multisearch.rst", "user/ppl/cmd/replace.rst", + "user/ppl/cmd/reverse.rst", "user/ppl/cmd/rex.rst", "user/ppl/cmd/search.rst", "user/ppl/cmd/showdatasources.rst", diff --git a/docs/user/ppl/cmd/reverse.rst b/docs/user/ppl/cmd/reverse.rst index 2efe833855f..bd0876be3b7 100644 --- a/docs/user/ppl/cmd/reverse.rst +++ b/docs/user/ppl/cmd/reverse.rst @@ -11,7 +11,21 @@ reverse Description ============ -| Using ``reverse`` command to reverse the display order of search results. The same results are returned, but in reverse order. +| Using ``reverse`` command to reverse the display order of search results. The behavior depends on the query context: +| +| **1. With existing sort**: Reverses the sort direction(s) +| **2. With @timestamp field (no explicit sort)**: Sorts by @timestamp in descending order +| **3. Without sort or @timestamp**: The command is ignored (no effect) + +Behavior +======== +The ``reverse`` command follows a three-tier logic: + +1. **If there's an explicit sort command before reverse**: The reverse command flips all sort directions (ASC ↔ DESC) +2. **If no explicit sort but the index has an @timestamp field**: The reverse command sorts by @timestamp in descending order (most recent first) +3. **If neither condition is met**: The reverse command is ignored and has no effect on the result order + +This design optimizes performance by avoiding expensive operations when reverse has no meaningful semantic interpretation. Version ======= @@ -26,16 +40,16 @@ reverse Note ===== -The `reverse` command processes the entire dataset. If applied directly to millions of records, it will consume significant memory resources on the coordinating node. Users should only apply the `reverse` command to smaller datasets, typically after aggregation operations. +The ``reverse`` command is optimized to avoid unnecessary memory consumption. When applied without an explicit sort or @timestamp field, it is ignored. When used with an explicit sort, it efficiently reverses the sort direction(s) without materializing the entire dataset. -Example 1: Basic reverse operation -================================== +Example 1: Reverse with explicit sort +====================================== -The example shows reversing the order of all documents. +The example shows reversing results after sorting by age in ascending order, effectively giving descending order. PPL query:: - os> source=accounts | fields account_number, age | reverse; + os> source=accounts | sort age | fields account_number, age | reverse; fetched rows / total rows = 4/4 +----------------+-----+ | account_number | age | @@ -47,33 +61,52 @@ PPL query:: +----------------+-----+ -Example 2: Reverse with sort -============================ +Example 2: Reverse with @timestamp field +========================================= -The example shows reversing results after sorting by age in ascending order, effectively giving descending order. +The example shows reverse on a time-series index automatically sorts by @timestamp in descending order (most recent first). PPL query:: - os> source=accounts | sort age | fields account_number, age | reverse; - fetched rows / total rows = 4/4 + os> source=time_test | fields value, @timestamp | reverse | head 3; + fetched rows / total rows = 3/3 + +-------+---------------------+ + | value | @timestamp | + |-------+---------------------| + | 9243 | 2025-07-28 09:41:29 | + | 7654 | 2025-07-28 08:22:11 | + | 8321 | 2025-07-28 07:05:33 | + +-------+---------------------+ + +Note: When the index contains an @timestamp field and no explicit sort is specified, reverse will sort by @timestamp DESC to show the most recent events first. This is particularly useful for log analysis and time-series data. + +Example 3: Reverse ignored (no sort, no @timestamp) +=================================================== + +The example shows that reverse is ignored when there's no explicit sort and no @timestamp field. + +PPL query:: + + os> source=accounts | fields account_number, age | reverse | head 2; + fetched rows / total rows = 2/2 +----------------+-----+ | account_number | age | |----------------+-----| - | 6 | 36 | - | 18 | 33 | | 1 | 32 | - | 13 | 28 | + | 6 | 36 | +----------------+-----+ +Note: Results appear in natural order (same as without reverse) because accounts index has no @timestamp field and no explicit sort was specified. + -Example 3: Reverse with head -============================ +Example 4: Reverse with sort and head +===================================== -The example shows using reverse with head to get the last 2 records from the original order. +The example shows using reverse with sort and head to get the top 2 records by age. PPL query:: - os> source=accounts | reverse | head 2 | fields account_number, age; + os> source=accounts | sort age | reverse | head 2 | fields account_number, age; fetched rows / total rows = 2/2 +----------------+-----+ | account_number | age | @@ -83,14 +116,14 @@ PPL query:: +----------------+-----+ -Example 4: Double reverse -========================= +Example 5: Double reverse with sort +=================================== -The example shows that applying reverse twice returns to the original order. +The example shows that applying reverse twice with an explicit sort returns to the original sort order. PPL query:: - os> source=accounts | reverse | reverse | fields account_number, age; + os> source=accounts | sort age | reverse | reverse | fields account_number, age; fetched rows / total rows = 4/4 +----------------+-----+ | account_number | age | @@ -102,19 +135,22 @@ PPL query:: +----------------+-----+ -Example 5: Reverse with complex pipeline -======================================= +Example 6: Reverse with multiple sort fields +============================================ -The example shows reverse working with filtering and field selection. +The example shows reverse flipping all sort directions when multiple fields are sorted. PPL query:: - os> source=accounts | where age > 30 | fields account_number, age | reverse; - fetched rows / total rows = 3/3 + os> source=accounts | sort +age, -account_number | reverse | fields account_number, age; + fetched rows / total rows = 4/4 +----------------+-----+ | account_number | age | |----------------+-----| | 6 | 36 | | 18 | 33 | | 1 | 32 | + | 13 | 28 | +----------------+-----+ + +Note: Original sort is ASC age, DESC account_number. After reverse, it becomes DESC age, ASC account_number. From 1dbb253978b27a470bc6c8d14a5ecbd43c6b49ec Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Thu, 13 Nov 2025 10:37:18 -0800 Subject: [PATCH 10/10] add IT for streamstats Signed-off-by: Kai Huang --- .../remote/CalciteReverseCommandIT.java | 111 ++++++++++++++++++ 1 file changed, 111 insertions(+) diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteReverseCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteReverseCommandIT.java index 3d1845fcd0d..1de15b24b72 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteReverseCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteReverseCommandIT.java @@ -6,6 +6,7 @@ package org.opensearch.sql.calcite.remote; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_STATE_COUNTRY; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_TIME_DATA; import static org.opensearch.sql.util.MatcherUtils.rows; import static org.opensearch.sql.util.MatcherUtils.schema; @@ -26,6 +27,7 @@ public void init() throws Exception { disallowCalciteFallback(); loadIndex(Index.BANK); loadIndex(Index.TIME_TEST_DATA); + loadIndex(Index.STATE_COUNTRY); } @Test @@ -224,4 +226,113 @@ public void testReverseWithTimestampAndExplicitSort() throws IOException { // Should reverse the value sort, giving us the highest values verifyDataRowsInOrder(result, rows(9521, "B"), rows(9367, "A"), rows(9321, "A")); } + + @Test + public void testStreamstatsWithReverse() throws IOException { + // Test that reverse is ignored when used directly after streamstats + // streamstats maintains order via __stream_seq__, but this field is projected out + // and doesn't create a detectable collation, so reverse is ignored (no-op) + JSONObject result = + executeQuery( + String.format( + "source=%s | streamstats count() as cnt, avg(age) as avg | reverse", + TEST_INDEX_STATE_COUNTRY)); + verifySchema( + result, + schema("name", "string"), + schema("country", "string"), + schema("state", "string"), + schema("month", "int"), + schema("year", "int"), + schema("age", "int"), + schema("cnt", "bigint"), + schema("avg", "double")); + // Reverse is ignored, so data remains in original streamstats order + verifyDataRowsInOrder( + result, + rows("Jake", "USA", "California", 4, 2023, 70, 1, 70), + rows("Hello", "USA", "New York", 4, 2023, 30, 2, 50), + rows("John", "Canada", "Ontario", 4, 2023, 25, 3, 41.666666666666664), + rows("Jane", "Canada", "Quebec", 4, 2023, 20, 4, 36.25)); + } + + @Test + public void testStreamstatsWindowWithReverse() throws IOException { + // Test that reverse is ignored after streamstats with window + JSONObject result = + executeQuery( + String.format( + "source=%s | streamstats window=2 avg(age) as avg | reverse", + TEST_INDEX_STATE_COUNTRY)); + verifySchema( + result, + schema("name", "string"), + schema("country", "string"), + schema("state", "string"), + schema("month", "int"), + schema("year", "int"), + schema("age", "int"), + schema("avg", "double")); + // Reverse is ignored, data remains in original order + // Window=2 means average of current and previous row (sliding window of size 2) + verifyDataRowsInOrder( + result, + rows("Jake", "USA", "California", 4, 2023, 70, 70), + rows("Hello", "USA", "New York", 4, 2023, 30, 50), + rows("John", "Canada", "Ontario", 4, 2023, 25, 27.5), + rows("Jane", "Canada", "Quebec", 4, 2023, 20, 22.5)); + } + + @Test + public void testStreamstatsByWithReverse() throws IOException { + // Test that reverse is ignored after streamstats with partitioning (by clause) + JSONObject result = + executeQuery( + String.format( + "source=%s | streamstats count() as cnt, avg(age) as avg by country | reverse", + TEST_INDEX_STATE_COUNTRY)); + verifySchema( + result, + schema("name", "string"), + schema("country", "string"), + schema("state", "string"), + schema("month", "int"), + schema("year", "int"), + schema("age", "int"), + schema("cnt", "bigint"), + schema("avg", "double")); + // Reverse is ignored, data remains in original order + verifyDataRowsInOrder( + result, + rows("Jake", "USA", "California", 4, 2023, 70, 1, 70), + rows("Hello", "USA", "New York", 4, 2023, 30, 2, 50), + rows("John", "Canada", "Ontario", 4, 2023, 25, 1, 25), + rows("Jane", "Canada", "Quebec", 4, 2023, 20, 2, 22.5)); + } + + @Test + public void testStreamstatsWithSortThenReverse() throws IOException { + // Test that reverse works when there's an explicit sort after streamstats + // The explicit sort creates a collation that reverse can detect and reverse + JSONObject result = + executeQuery( + String.format( + "source=%s | streamstats count() as cnt | sort age | reverse | head 3", + TEST_INDEX_STATE_COUNTRY)); + verifySchema( + result, + schema("name", "string"), + schema("country", "string"), + schema("state", "string"), + schema("month", "int"), + schema("year", "int"), + schema("age", "int"), + schema("cnt", "bigint")); + // With explicit sort and reverse, data is in descending age order + verifyDataRowsInOrder( + result, + rows("Jake", "USA", "California", 4, 2023, 70, 1), + rows("Hello", "USA", "New York", 4, 2023, 30, 2), + rows("John", "Canada", "Ontario", 4, 2023, 25, 3)); + } }