diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java index ded7ba541a4..633472d958f 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java @@ -68,6 +68,9 @@ public interface PlanUtils { String ROW_NUMBER_COLUMN_FOR_STREAMSTATS = "__stream_seq__"; String ROW_NUMBER_COLUMN_FOR_CHART = "_row_number_chart_"; + String DIRECTION = "DIRECTION"; + String NULL_DIRECTION = "NULL_DIRECTION"; + static SpanUnit intervalUnitToSpanUnit(IntervalUnit unit) { return switch (unit) { case MICROSECOND -> SpanUnit.MICROSECOND; @@ -532,6 +535,23 @@ static boolean sortByFieldsOnly(Sort sort) { return !sort.getCollation().getFieldCollations().isEmpty() && sort.fetch == null; } + /** + * Check if the sort collation points to non field project expression. + * + * @param sort the sort operator adding sort order over project + * @param project project operation that may contain non field expressions + * @return flag to indicate whether non field project expression will be sorted + */ + static boolean sortReferencesExpr(Sort sort, Project project) { + if (sort.getCollation().getFieldCollations().isEmpty()) { + return false; + } + return sort.getCollation().getFieldCollations().stream() + .anyMatch( + relFieldCollation -> + project.getProjects().get(relFieldCollation.getFieldIndex()) instanceof RexCall); + } + /** * Get a string representation of the argument types expressed in ExprType for error messages. * diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index 994e583eaa8..6c94be0c12d 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -870,6 +870,80 @@ public void testSimpleSortExpressionPushDownWithOnlyExprProjected() throws Excep assertJsonEqualsIgnoreId(expected, result); } + @Test + public void testComplexSortExpressionPushDownExplain() throws Exception { + String query = + "source=opensearch-sql_test_index_bank| eval age2 = age + balance | sort age2 | fields age," + + " age2"; + var result = explainQueryYaml(query); + String expected = loadExpectedPlan("explain_complex_sort_expr_push.yaml"); + assertYamlEqualsIgnoreId(expected, result); + } + + @Test + public void testComplexSortExpressionPushDownWithOnlyExprProjected() throws Exception { + String query = + "source=opensearch-sql_test_index_bank| eval age2 = age + balance | sort age2 | fields" + + " age2"; + var result = explainQueryYaml(query); + String expected = loadExpectedPlan("explain_complex_sort_expr_single_expr_output_push.yaml"); + assertYamlEqualsIgnoreId(expected, result); + } + + @Test + public void testComplexSortExpressionPushDownWithoutExprProjected() throws Exception { + String query = + "source=opensearch-sql_test_index_bank| eval age2 = age + balance | sort age2 | fields age"; + var result = explainQueryYaml(query); + String expected = loadExpectedPlan("explain_complex_sort_expr_no_expr_output_push.yaml"); + assertYamlEqualsIgnoreId(expected, result); + } + + @Test + public void testComplexSortExpressionProjectThenSort() throws Exception { + String query = + "source=opensearch-sql_test_index_bank| eval age2 = age + balance | fields age, age2 | sort" + + " age2"; + var result = explainQueryYaml(query); + String expected = loadExpectedPlan("explain_complex_sort_expr_project_then_sort.yaml"); + assertYamlEqualsIgnoreId(expected, result); + } + + /* + * TODO: A potential optimization is to leverage RexSimplify to simplify -(+($10, $7), $10) to $7 + * Above simplification can only work when $10 is nonnull and there is no precision loss of + * expression calculation + */ + @Test + public void testSortNestedComplexExpression() throws Exception { + String query = + "source=opensearch-sql_test_index_bank| eval age2 = age + balance, age3 = age2 - age | sort" + + " age3"; + var result = explainQueryYaml(query); + String expected = loadExpectedPlan("explain_complex_sort_nested_expr.yaml"); + assertYamlEqualsIgnoreId(expected, result); + } + + @Test + public void testSortComplexExpressionThenSortField() throws Exception { + String query = + "source=opensearch-sql_test_index_bank| eval age2 = age + balance | sort age2, age | eval" + + " balance2 = abs(balance) | sort age"; + var result = explainQueryYaml(query); + String expected = loadExpectedPlan("explain_complex_sort_then_field_sort.yaml"); + assertYamlEqualsIgnoreId(expected, result); + } + + @Test + public void testSortComplexExprMixedWithSimpleExpr() throws Exception { + String query = + "source=opensearch-sql_test_index_bank| eval age2 = age + balance, balance2 = balance + 1 |" + + " sort age2, balance2 "; + var result = explainQueryYaml(query); + String expected = loadExpectedPlan("explain_sort_complex_and_simple_expr.yaml"); + assertYamlEqualsIgnoreId(expected, result); + } + @Test public void testRexExplain() throws IOException { String query = diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/SortCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/SortCommandIT.java index b760a9c5546..a9001f5c995 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/SortCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/SortCommandIT.java @@ -10,7 +10,9 @@ import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_DOG; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_WEBLOGS; import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.schema; import static org.opensearch.sql.util.MatcherUtils.verifyOrder; +import static org.opensearch.sql.util.MatcherUtils.verifySchema; import java.io.IOException; import java.util.ArrayList; @@ -321,4 +323,95 @@ public void testHeadThenSort() throws IOException { verifyOrder(result, rows(28), rows(32)); } } + + @Test + public void testSortComplexExpression() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | eval age2 = age + balance | sort age2 | fields age, balance, age2", + TEST_INDEX_BANK)); + verifyOrder( + result, + rows(33, 4180, 4213), + rows(36, 5686, 5722), + rows(36, 16418, 16454), + rows(28, 32838, 32866), + rows(32, 39225, 39257), + rows(39, 40540, 40579), + rows(34, 48086, 48120)); + } + + @Test + public void testSortComplexExpressionThenHead() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | eval age2 = age + balance | sort age2 | fields age, balance, age2 |" + + " head 2", + TEST_INDEX_BANK)); + verifyOrder(result, rows(33, 4180, 4213), rows(36, 5686, 5722)); + } + + @Test + public void testPushdownSortStringExpression() throws IOException { + String ppl = + String.format( + "source=%s | eval firstname2 = substring(firstname, 1, 3) | sort firstname2 | fields" + + " firstname2, firstname", + TEST_INDEX_BANK_WITH_NULL_VALUES); + + JSONObject result = executeQuery(ppl); + verifySchema(result, schema("firstname2", "string"), schema("firstname", "string")); + verifyOrder( + result, + rows("Amb", "Amber JOHnny"), + rows("Dal", "Dale"), + rows("Dil", "Dillard"), + rows("Eli", "Elinor"), + rows("Hat", "Hattie"), + rows("Nan", "Nanette"), + rows("Vir", "Virginia")); + } + + @Test + public void testPushdownSortExpressionContainsNull() throws IOException { + String ppl = + String.format( + "source=%s | eval balance2 = abs(balance) | sort -balance2 | fields balance, balance2", + TEST_INDEX_BANK_WITH_NULL_VALUES); + + JSONObject result = executeQuery(ppl); + verifySchema(result, schema("balance", "bigint"), schema("balance2", "bigint")); + verifyOrder( + result, + rows(48086, 48086), + rows(39225, 39225), + rows(32838, 32838), + rows(4180, 4180), + rows(null, null), + rows(null, null), + rows(null, null)); + } + + @Test + public void testPushdownSortExpressionWithMixedFieldSort() throws IOException { + String ppl = + String.format( + "source=%s | eval balance2 = abs(balance) | sort -balance2, account_number | fields" + + " balance2, account_number", + TEST_INDEX_BANK_WITH_NULL_VALUES); + + JSONObject result = executeQuery(ppl); + verifySchema(result, schema("balance2", "bigint"), schema("account_number", "bigint")); + verifyOrder( + result, + rows(48086, 32), + rows(39225, 1), + rows(32838, 13), + rows(4180, 18), + rows(null, 6), + rows(null, 20), + rows(null, 25)); + } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_no_expr_output_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_no_expr_output_push.yaml new file mode 100644 index 00000000000..6461c40061c --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_no_expr_output_push.yaml @@ -0,0 +1,9 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(age=[$10]) + LogicalSort(sort0=[$19], dir0=[ASC-nulls-first]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _id=[$13], _index=[$14], _score=[$15], _maxscore=[$16], _sort=[$17], _routing=[$18], age2=[+($10, $7)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[SORT_EXPR->[+($10, $7) ASCENDING NULLS_FIRST], LIMIT->10000, PROJECT->[age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["age"],"excludes":[]},"sort":[{"_script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQA0HsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYWdlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYmFsYW5jZSIKICAgIH0KICBdLAogICJudWxsYWJsZSI6IGZhbHNlCn10AARleHBydADFewogICJvcCI6IHsKICAgICJuYW1lIjogIisiLAogICAgImtpbmQiOiAiUExVUyIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImlucHV0IjogMCwKICAgICAgIm5hbWUiOiAiJDAiCiAgICB9LAogICAgewogICAgICAiaW5wdXQiOiAxLAogICAgICAibmFtZSI6ICIkMSIKICAgIH0KICBdCn10AApmaWVsZFR5cGVzc3IAEWphdmEudXRpbC5IYXNoTWFwBQfawcMWYNEDAAJGAApsb2FkRmFjdG9ySQAJdGhyZXNob2xkeHA/QAAAAAAADHcIAAAAEAAAAAJ0AAdiYWxhbmNlfnIAKW9yZy5vcGVuc2VhcmNoLnNxbC5kYXRhLnR5cGUuRXhwckNvcmVUeXBlAAAAAAAAAAASAAB4cgAOamF2YS5sYW5nLkVudW0AAAAAAAAAABIAAHhwdAAETE9OR3QAA2FnZX5xAH4ACnQAB0lOVEVHRVJ4eA==\"}","lang":"opensearch_compounded_script","params":{"NULL_DIRECTION":"FIRST","DIRECTION":"ASCENDING","utcTimestamp": 0}},"type":"number","order":"asc"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_project_then_sort.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_project_then_sort.yaml new file mode 100644 index 00000000000..256c393b069 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_project_then_sort.yaml @@ -0,0 +1,9 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$1], dir0=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalSort(sort0=[$1], dir0=[ASC-nulls-first]) + LogicalProject(age=[$10], age2=[+($10, $7)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableCalc(expr#0..1=[{inputs}], expr#2=[+($t0, $t1)], age=[$t0], $f1=[$t2]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[age, balance], SORT_EXPR->[+($0, $1) ASCENDING NULLS_FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["age","balance"],"excludes":[]},"sort":[{"_script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQA0HsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYWdlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYmFsYW5jZSIKICAgIH0KICBdLAogICJudWxsYWJsZSI6IGZhbHNlCn10AARleHBydADFewogICJvcCI6IHsKICAgICJuYW1lIjogIisiLAogICAgImtpbmQiOiAiUExVUyIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImlucHV0IjogMCwKICAgICAgIm5hbWUiOiAiJDAiCiAgICB9LAogICAgewogICAgICAiaW5wdXQiOiAxLAogICAgICAibmFtZSI6ICIkMSIKICAgIH0KICBdCn10AApmaWVsZFR5cGVzc3IAEWphdmEudXRpbC5IYXNoTWFwBQfawcMWYNEDAAJGAApsb2FkRmFjdG9ySQAJdGhyZXNob2xkeHA/QAAAAAAADHcIAAAAEAAAAAJ0AAdiYWxhbmNlfnIAKW9yZy5vcGVuc2VhcmNoLnNxbC5kYXRhLnR5cGUuRXhwckNvcmVUeXBlAAAAAAAAAAASAAB4cgAOamF2YS5sYW5nLkVudW0AAAAAAAAAABIAAHhwdAAETE9OR3QAA2FnZX5xAH4ACnQAB0lOVEVHRVJ4eA==\"}","lang":"opensearch_compounded_script","params":{"NULL_DIRECTION":"FIRST","DIRECTION":"ASCENDING","utcTimestamp": 0}},"type":"number","order":"asc"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_push.yaml new file mode 100644 index 00000000000..b509fb3117f --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_push.yaml @@ -0,0 +1,10 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$1], dir0=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(age=[$10], age2=[$19]) + LogicalSort(sort0=[$19], dir0=[ASC-nulls-first]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _id=[$13], _index=[$14], _score=[$15], _maxscore=[$16], _sort=[$17], _routing=[$18], age2=[+($10, $7)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableCalc(expr#0..1=[{inputs}], expr#2=[+($t0, $t1)], age=[$t0], $f1=[$t2]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[age, balance], SORT_EXPR->[+($0, $1) ASCENDING NULLS_FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["age","balance"],"excludes":[]},"sort":[{"_script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQA0HsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYWdlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYmFsYW5jZSIKICAgIH0KICBdLAogICJudWxsYWJsZSI6IGZhbHNlCn10AARleHBydADFewogICJvcCI6IHsKICAgICJuYW1lIjogIisiLAogICAgImtpbmQiOiAiUExVUyIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImlucHV0IjogMCwKICAgICAgIm5hbWUiOiAiJDAiCiAgICB9LAogICAgewogICAgICAiaW5wdXQiOiAxLAogICAgICAibmFtZSI6ICIkMSIKICAgIH0KICBdCn10AApmaWVsZFR5cGVzc3IAEWphdmEudXRpbC5IYXNoTWFwBQfawcMWYNEDAAJGAApsb2FkRmFjdG9ySQAJdGhyZXNob2xkeHA/QAAAAAAADHcIAAAAEAAAAAJ0AAdiYWxhbmNlfnIAKW9yZy5vcGVuc2VhcmNoLnNxbC5kYXRhLnR5cGUuRXhwckNvcmVUeXBlAAAAAAAAAAASAAB4cgAOamF2YS5sYW5nLkVudW0AAAAAAAAAABIAAHhwdAAETE9OR3QAA2FnZX5xAH4ACnQAB0lOVEVHRVJ4eA==\"}","lang":"opensearch_compounded_script","params":{"NULL_DIRECTION":"FIRST","DIRECTION":"ASCENDING","utcTimestamp": 0}},"type":"number","order":"asc"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_single_expr_output_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_single_expr_output_push.yaml new file mode 100644 index 00000000000..db832db7b4d --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_single_expr_output_push.yaml @@ -0,0 +1,10 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$0], dir0=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(age2=[$19]) + LogicalSort(sort0=[$19], dir0=[ASC-nulls-first]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _id=[$13], _index=[$14], _score=[$15], _maxscore=[$16], _sort=[$17], _routing=[$18], age2=[+($10, $7)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableCalc(expr#0..1=[{inputs}], expr#2=[+($t0, $t1)], $f0=[$t2]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[age, balance], SORT_EXPR->[+($0, $1) ASCENDING NULLS_FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["age","balance"],"excludes":[]},"sort":[{"_script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQA0HsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYWdlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYmFsYW5jZSIKICAgIH0KICBdLAogICJudWxsYWJsZSI6IGZhbHNlCn10AARleHBydADFewogICJvcCI6IHsKICAgICJuYW1lIjogIisiLAogICAgImtpbmQiOiAiUExVUyIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImlucHV0IjogMCwKICAgICAgIm5hbWUiOiAiJDAiCiAgICB9LAogICAgewogICAgICAiaW5wdXQiOiAxLAogICAgICAibmFtZSI6ICIkMSIKICAgIH0KICBdCn10AApmaWVsZFR5cGVzc3IAEWphdmEudXRpbC5IYXNoTWFwBQfawcMWYNEDAAJGAApsb2FkRmFjdG9ySQAJdGhyZXNob2xkeHA/QAAAAAAADHcIAAAAEAAAAAJ0AAdiYWxhbmNlfnIAKW9yZy5vcGVuc2VhcmNoLnNxbC5kYXRhLnR5cGUuRXhwckNvcmVUeXBlAAAAAAAAAAASAAB4cgAOamF2YS5sYW5nLkVudW0AAAAAAAAAABIAAHhwdAAETE9OR3QAA2FnZX5xAH4ACnQAB0lOVEVHRVJ4eA==\"}","lang":"opensearch_compounded_script","params":{"NULL_DIRECTION":"FIRST","DIRECTION":"ASCENDING","utcTimestamp": 0}},"type":"number","order":"asc"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_nested_expr.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_nested_expr.yaml new file mode 100644 index 00000000000..b57328afbae --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_nested_expr.yaml @@ -0,0 +1,10 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$14], dir0=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], age2=[$19], age3=[$20]) + LogicalSort(sort0=[$20], dir0=[ASC-nulls-first]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _id=[$13], _index=[$14], _score=[$15], _maxscore=[$16], _sort=[$17], _routing=[$18], age2=[+($10, $7)], age3=[-(+($10, $7), $10)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableCalc(expr#0..12=[{inputs}], expr#13=[+($t10, $t7)], expr#14=[-($t13, $t10)], proj#0..14=[{exprs}]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], SORT_EXPR->[-(+($10, $7), $10) ASCENDING NULLS_FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]},"sort":[{"_script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQA0HsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYWdlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYmFsYW5jZSIKICAgIH0KICBdLAogICJudWxsYWJsZSI6IGZhbHNlCn10AARleHBydAHbewogICJvcCI6IHsKICAgICJuYW1lIjogIi0iLAogICAgImtpbmQiOiAiTUlOVVMiLAogICAgInN5bnRheCI6ICJCSU5BUlkiCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJvcCI6IHsKICAgICAgICAibmFtZSI6ICIrIiwKICAgICAgICAia2luZCI6ICJQTFVTIiwKICAgICAgICAic3ludGF4IjogIkJJTkFSWSIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJpbnB1dCI6IDAsCiAgICAgICAgICAibmFtZSI6ICIkMCIKICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJpbnB1dCI6IDEsCiAgICAgICAgICAibmFtZSI6ICIkMSIKICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJpbnB1dCI6IDAsCiAgICAgICJuYW1lIjogIiQwIgogICAgfQogIF0sCiAgInR5cGUiOiB7CiAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgIm51bGxhYmxlIjogdHJ1ZQogIH0KfXQACmZpZWxkVHlwZXNzcgARamF2YS51dGlsLkhhc2hNYXAFB9rBwxZg0QMAAkYACmxvYWRGYWN0b3JJAAl0aHJlc2hvbGR4cD9AAAAAAAAMdwgAAAAQAAAAAnQAB2JhbGFuY2V+cgApb3JnLm9wZW5zZWFyY2guc3FsLmRhdGEudHlwZS5FeHByQ29yZVR5cGUAAAAAAAAAABIAAHhyAA5qYXZhLmxhbmcuRW51bQAAAAAAAAAAEgAAeHB0AARMT05HdAADYWdlfnEAfgAKdAAHSU5URUdFUnh4\"}","lang":"opensearch_compounded_script","params":{"NULL_DIRECTION":"FIRST","DIRECTION":"ASCENDING","utcTimestamp": 0}},"type":"number","order":"asc"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_then_field_sort.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_then_field_sort.yaml new file mode 100644 index 00000000000..d9726a2beb1 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_then_field_sort.yaml @@ -0,0 +1,17 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$10], dir0=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], age2=[$19], balance2=[$20]) + LogicalSort(sort0=[$10], dir0=[ASC-nulls-first]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _id=[$13], _index=[$14], _score=[$15], _maxscore=[$16], _sort=[$17], _routing=[$18], age2=[$19], balance2=[ABS($7)]) + LogicalSort(sort0=[$19], sort1=[$10], dir0=[ASC-nulls-first], dir1=[ASC-nulls-first]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _id=[$13], _index=[$14], _score=[$15], _maxscore=[$16], _sort=[$17], _routing=[$18], age2=[+($10, $7)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableCalc(expr#0..12=[{inputs}], expr#13=[+($t10, $t7)], expr#14=[ABS($t7)], proj#0..14=[{exprs}]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], SORT->[{ + "age" : { + "order" : "asc", + "missing" : "_first" + } + }], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]},"sort":[{"age":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_complex_and_simple_expr.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_complex_and_simple_expr.yaml new file mode 100644 index 00000000000..ee2254f5420 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_complex_and_simple_expr.yaml @@ -0,0 +1,10 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$13], sort1=[$14], dir0=[ASC-nulls-first], dir1=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], age2=[$19], balance2=[$20]) + LogicalSort(sort0=[$19], sort1=[$20], dir0=[ASC-nulls-first], dir1=[ASC-nulls-first]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _id=[$13], _index=[$14], _score=[$15], _maxscore=[$16], _sort=[$17], _routing=[$18], age2=[+($10, $7)], balance2=[+($7, 1)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableCalc(expr#0..12=[{inputs}], expr#13=[+($t10, $t7)], expr#14=[1], expr#15=[+($t7, $t14)], proj#0..13=[{exprs}], $f14=[$t15]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], SORT_EXPR->[+($10, $7) ASCENDING NULLS_FIRST, balance ASCENDING NULLS_FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]},"sort":[{"_script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQA0HsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYWdlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYmFsYW5jZSIKICAgIH0KICBdLAogICJudWxsYWJsZSI6IGZhbHNlCn10AARleHBydADFewogICJvcCI6IHsKICAgICJuYW1lIjogIisiLAogICAgImtpbmQiOiAiUExVUyIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImlucHV0IjogMCwKICAgICAgIm5hbWUiOiAiJDAiCiAgICB9LAogICAgewogICAgICAiaW5wdXQiOiAxLAogICAgICAibmFtZSI6ICIkMSIKICAgIH0KICBdCn10AApmaWVsZFR5cGVzc3IAEWphdmEudXRpbC5IYXNoTWFwBQfawcMWYNEDAAJGAApsb2FkRmFjdG9ySQAJdGhyZXNob2xkeHA/QAAAAAAADHcIAAAAEAAAAAJ0AAdiYWxhbmNlfnIAKW9yZy5vcGVuc2VhcmNoLnNxbC5kYXRhLnR5cGUuRXhwckNvcmVUeXBlAAAAAAAAAAASAAB4cgAOamF2YS5sYW5nLkVudW0AAAAAAAAAABIAAHhwdAAETE9OR3QAA2FnZX5xAH4ACnQAB0lOVEVHRVJ4eA==\"}","lang":"opensearch_compounded_script","params":{"NULL_DIRECTION":"FIRST","DIRECTION":"ASCENDING","utcTimestamp": 0}},"type":"number","order":"asc"}},{"balance":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_type_push.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_type_push.json index 87801c949fc..054bc203640 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_type_push.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_type_push.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(age=[$8])\n LogicalSort(sort0=[$17], dir0=[ASC-nulls-first])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], $f17=[SAFE_CAST($8)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "EnumerableCalc(expr#0..1=[{inputs}], age=[$t0])\n EnumerableLimit(fetch=[10000])\n EnumerableSort(sort0=[$1], dir0=[ASC-nulls-first])\n EnumerableCalc(expr#0=[{inputs}], expr#1=[SAFE_CAST($t0)], proj#0..1=[{exprs}])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[age]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"age\"],\"excludes\":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[age], SORT_EXPR->[SAFE_CAST($0) ASCENDING NULLS_FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"age\"],\"excludes\":[]},\"sort\":[{\"_script\":{\"script\":{\"source\":\"{\\\"langType\\\":\\\"calcite\\\",\\\"script\\\":\\\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAensKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJhZ2UiCiAgICB9CiAgXSwKICAibnVsbGFibGUiOiBmYWxzZQp9dAAEZXhwcnQA3XsKICAib3AiOiB7CiAgICAibmFtZSI6ICJTQUZFX0NBU1QiLAogICAgImtpbmQiOiAiU0FGRV9DQVNUIiwKICAgICJzeW50YXgiOiAiU1BFQ0lBTCIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImlucHV0IjogMCwKICAgICAgIm5hbWUiOiAiJDAiCiAgICB9CiAgXSwKICAidHlwZSI6IHsKICAgICJ0eXBlIjogIkRPVUJMRSIsCiAgICAibnVsbGFibGUiOiB0cnVlCiAgfQp9dAAKZmllbGRUeXBlc3NyABFqYXZhLnV0aWwuSGFzaE1hcAUH2sHDFmDRAwACRgAKbG9hZEZhY3RvckkACXRocmVzaG9sZHhwP0AAAAAAAAx3CAAAABAAAAABdAADYWdlfnIAKW9yZy5vcGVuc2VhcmNoLnNxbC5kYXRhLnR5cGUuRXhwckNvcmVUeXBlAAAAAAAAAAASAAB4cgAOamF2YS5sYW5nLkVudW0AAAAAAAAAABIAAHhwdAAETE9OR3h4\\\"}\",\"lang\":\"opensearch_compounded_script\",\"params\":{\"NULL_DIRECTION\":\"FIRST\",\"DIRECTION\":\"ASCENDING\",\"utcTimestamp\":*}},\"type\":\"number\",\"order\":\"asc\"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" } } \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_complex_sort_expr_no_expr_output_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_complex_sort_expr_no_expr_output_push.yaml new file mode 100644 index 00000000000..5c479c6867e --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_complex_sort_expr_no_expr_output_push.yaml @@ -0,0 +1,13 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(age=[$10]) + LogicalSort(sort0=[$19], dir0=[ASC-nulls-first]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _id=[$13], _index=[$14], _score=[$15], _maxscore=[$16], _sort=[$17], _routing=[$18], age2=[+($10, $7)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableCalc(expr#0..1=[{inputs}], age=[$t0]) + EnumerableLimit(fetch=[10000]) + EnumerableSort(sort0=[$1], dir0=[ASC-nulls-first]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[+($t10, $t7)], age=[$t10], age2=[$t19]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_complex_sort_expr_project_then_sort.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_complex_sort_expr_project_then_sort.yaml new file mode 100644 index 00000000000..a95c277b40e --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_complex_sort_expr_project_then_sort.yaml @@ -0,0 +1,11 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$1], dir0=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalSort(sort0=[$1], dir0=[ASC-nulls-first]) + LogicalProject(age=[$10], age2=[+($10, $7)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableSort(sort0=[$1], dir0=[ASC-nulls-first]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[+($t10, $t7)], age=[$t10], age2=[$t19]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_complex_sort_expr_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_complex_sort_expr_push.yaml new file mode 100644 index 00000000000..ef4ea5fc43e --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_complex_sort_expr_push.yaml @@ -0,0 +1,12 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$1], dir0=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(age=[$10], age2=[$19]) + LogicalSort(sort0=[$19], dir0=[ASC-nulls-first]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _id=[$13], _index=[$14], _score=[$15], _maxscore=[$16], _sort=[$17], _routing=[$18], age2=[+($10, $7)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableSort(sort0=[$1], dir0=[ASC-nulls-first]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[+($t10, $t7)], age=[$t10], age2=[$t19]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_complex_sort_expr_single_expr_output_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_complex_sort_expr_single_expr_output_push.yaml new file mode 100644 index 00000000000..7df4a4d7f4e --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_complex_sort_expr_single_expr_output_push.yaml @@ -0,0 +1,12 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$0], dir0=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(age2=[$19]) + LogicalSort(sort0=[$19], dir0=[ASC-nulls-first]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _id=[$13], _index=[$14], _score=[$15], _maxscore=[$16], _sort=[$17], _routing=[$18], age2=[+($10, $7)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableSort(sort0=[$0], dir0=[ASC-nulls-first]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[+($t10, $t7)], age2=[$t19]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_complex_sort_nested_expr.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_complex_sort_nested_expr.yaml new file mode 100644 index 00000000000..711608264eb --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_complex_sort_nested_expr.yaml @@ -0,0 +1,12 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$14], dir0=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], age2=[$19], age3=[$20]) + LogicalSort(sort0=[$20], dir0=[ASC-nulls-first]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _id=[$13], _index=[$14], _score=[$15], _maxscore=[$16], _sort=[$17], _routing=[$18], age2=[+($10, $7)], age3=[-(+($10, $7), $10)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableSort(sort0=[$14], dir0=[ASC-nulls-first]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[+($t10, $t7)], expr#20=[-($t19, $t10)], proj#0..12=[{exprs}], age2=[$t19], age3=[$t20]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_complex_sort_then_field_sort.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_complex_sort_then_field_sort.yaml new file mode 100644 index 00000000000..362f847ae6e --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_complex_sort_then_field_sort.yaml @@ -0,0 +1,15 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$10], dir0=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], age2=[$19], balance2=[$20]) + LogicalSort(sort0=[$10], dir0=[ASC-nulls-first]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _id=[$13], _index=[$14], _score=[$15], _maxscore=[$16], _sort=[$17], _routing=[$18], age2=[$19], balance2=[ABS($7)]) + LogicalSort(sort0=[$19], sort1=[$10], dir0=[ASC-nulls-first], dir1=[ASC-nulls-first]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _id=[$13], _index=[$14], _score=[$15], _maxscore=[$16], _sort=[$17], _routing=[$18], age2=[+($10, $7)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..13=[{inputs}], expr#14=[ABS($t7)], proj#0..14=[{exprs}]) + EnumerableSort(sort0=[$10], dir0=[ASC-nulls-first]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[+($t10, $t7)], proj#0..12=[{exprs}], age2=[$t19]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_sort_complex_and_simple_expr.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_sort_complex_and_simple_expr.yaml new file mode 100644 index 00000000000..873a778f979 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_sort_complex_and_simple_expr.yaml @@ -0,0 +1,12 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$13], sort1=[$14], dir0=[ASC-nulls-first], dir1=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], age2=[$19], balance2=[$20]) + LogicalSort(sort0=[$19], sort1=[$20], dir0=[ASC-nulls-first], dir1=[ASC-nulls-first]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _id=[$13], _index=[$14], _score=[$15], _maxscore=[$16], _sort=[$17], _routing=[$18], age2=[+($10, $7)], balance2=[+($7, 1)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableSort(sort0=[$13], sort1=[$14], dir0=[ASC-nulls-first], dir1=[ASC-nulls-first]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[+($t10, $t7)], expr#20=[1], expr#21=[+($t7, $t20)], proj#0..12=[{exprs}], age2=[$t19], balance2=[$t21]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/ExpandCollationOnProjectExprRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/ExpandCollationOnProjectExprRule.java index 204ecacbd39..a09be09a34b 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/ExpandCollationOnProjectExprRule.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/ExpandCollationOnProjectExprRule.java @@ -5,22 +5,27 @@ package org.opensearch.sql.opensearch.planner.rules; +import java.util.HashMap; +import java.util.Map; import java.util.Optional; +import java.util.function.Predicate; import org.apache.calcite.adapter.enumerable.EnumerableProject; import org.apache.calcite.plan.RelOptRuleCall; import org.apache.calcite.plan.RelRule; import org.apache.calcite.plan.RelTrait; import org.apache.calcite.plan.RelTraitSet; import org.apache.calcite.plan.volcano.AbstractConverter; +import org.apache.calcite.plan.volcano.RelSubset; import org.apache.calcite.rel.RelCollation; import org.apache.calcite.rel.RelCollationTraitDef; import org.apache.calcite.rel.RelFieldCollation; -import org.apache.calcite.rel.RelFieldCollation.Direction; +import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Project; import org.apache.commons.lang3.tuple.Pair; import org.immutables.value.Value; import org.opensearch.sql.calcite.plan.OpenSearchRuleConfig; import org.opensearch.sql.calcite.utils.PlanUtils; +import org.opensearch.sql.opensearch.storage.scan.CalciteEnumerableIndexScan; import org.opensearch.sql.opensearch.util.OpenSearchRelOptUtil; /** @@ -29,6 +34,9 @@ * takes effect, the input collation is changed to a sort over field instead of original sort over * expression. It changes the collation requirement of the whole query. * + *

Another problem is if sort expression is pushed down to scan, the Enumerable project doesn't + * know the collation is already satisfied. + * *

AbstractConverter physical node is supposed to resolve the problem of inconsistent collation * requirement between physical node input and output. This optimization rule finds equivalent * output expression collations and input field collations. If their collation traits are satisfied, @@ -48,52 +56,136 @@ public void onMatch(RelOptRuleCall call) { final Project project = call.rel(1); final RelTraitSet toTraits = converter.getTraitSet(); final RelCollation toCollation = toTraits.getTrait(RelCollationTraitDef.INSTANCE); - final RelTrait fromTrait = - project.getInput().getTraitSet().getTrait(RelCollationTraitDef.INSTANCE); + + assert toCollation != null && toCollation.getFieldCollations() != null + : "Output field collations should not be null"; + + Map>> orderEquivInfoMap = new HashMap<>(); + for (RelFieldCollation relFieldCollation : toCollation.getFieldCollations()) { + orderEquivInfoMap.put( + relFieldCollation.getFieldIndex(), + OpenSearchRelOptUtil.getOrderEquivalentInputInfo( + project.getProjects().get(relFieldCollation.getFieldIndex()))); + } + + // Branch 1: Check if complex expressions are already sorted by scan and assign collation + if (handleComplexExpressionsSortedByScan( + call, project, toTraits, toCollation, orderEquivInfoMap)) { + return; + } + + // Branch 2: Handle simple expressions that can be transformed to field sorts + handleSimpleExpressionFieldSorts(call, project, toTraits, toCollation, orderEquivInfoMap); + } + + /** + * Handle the case where complex expressions are already sorted by the scan. In this case, we can + * directly assign toTrait to the new EnumerableProject. + * + * @return true if handled, false if not applicable + */ + private boolean handleComplexExpressionsSortedByScan( + RelOptRuleCall call, + Project project, + RelTraitSet toTraits, + RelCollation toCollation, + Map>> orderEquivInfoMap) { + + // Check if toCollation is null or not a simple RelCollation with field collations + if (toCollation == null || toCollation.getFieldCollations().isEmpty()) { + return false; + } + + // Extract the actual enumerable scan from the input, handling RelSubset case + CalciteEnumerableIndexScan scan = extractEnumerableScanFromInput(project.getInput()); + if (scan == null) { + return false; + } + + // Check if the scan can provide the required sort collation + if (OpenSearchRelOptUtil.canScanProvideSortCollation( + scan, project, toCollation, orderEquivInfoMap)) { + // The scan has already provided the sorting for complex expressions + // We can directly assign toTrait to new EnumerableProject + Project newProject = + project.copy(toTraits, project.getInput(), project.getProjects(), project.getRowType()); + call.transformTo(newProject); + return true; + } + return false; + } + + /** + * Handle simple expressions that can be transformed to field sorts using + * getOrderEquivalentInputInfo. + */ + private void handleSimpleExpressionFieldSorts( + RelOptRuleCall call, + Project project, + RelTraitSet toTraits, + RelCollation toCollation, + Map>> orderEquivInfoMap) { + + RelTrait fromTrait = project.getInput().getTraitSet().getTrait(RelCollationTraitDef.INSTANCE); + // In case of fromTrait is an instance of RelCompositeTrait, it most likely finds equivalence by // default. // Let it go through default ExpandConversionRule to determine trait satisfaction. - if (fromTrait != null && fromTrait instanceof RelCollation) { + if (fromTrait instanceof RelCollation) { RelCollation fromCollation = (RelCollation) fromTrait; // TODO: Handle the case where multi expr collations are mapped to the same source field if (toCollation == null || toCollation.getFieldCollations().isEmpty() - || fromCollation == null || fromCollation.getFieldCollations().size() < toCollation.getFieldCollations().size()) { return; } for (int i = 0; i < toCollation.getFieldCollations().size(); i++) { - RelFieldCollation targetFieldCollation = toCollation.getFieldCollations().get(i); - Optional> equivalentCollationInputInfo = - OpenSearchRelOptUtil.getOrderEquivalentInputInfo( - project.getProjects().get(targetFieldCollation.getFieldIndex())); - - if (equivalentCollationInputInfo.isEmpty()) { - return; - } - - RelFieldCollation sourceFieldCollation = fromCollation.getFieldCollations().get(i); - int equivalentSourceIndex = equivalentCollationInputInfo.get().getLeft(); - Direction equivalentSourceDirection = - equivalentCollationInputInfo.get().getRight() - ? targetFieldCollation.getDirection().reverse() - : targetFieldCollation.getDirection(); - if (!(equivalentSourceIndex == sourceFieldCollation.getFieldIndex() - && equivalentSourceDirection == sourceFieldCollation.getDirection())) { + RelFieldCollation toCollationFieldCollation = toCollation.getFieldCollations().get(i); + if (!OpenSearchRelOptUtil.sourceCollationSatisfiesTargetCollation( + fromCollation.getFieldCollations().get(i), + toCollationFieldCollation, + orderEquivInfoMap.get(toCollationFieldCollation.getFieldIndex()))) { return; } } // After collation equivalence analysis, fromTrait satisfies toTrait. Copy the target trait - // set - // to new EnumerableProject. + // set to new EnumerableProject. Project newProject = project.copy(toTraits, project.getInput(), project.getProjects(), project.getRowType()); call.transformTo(newProject); } } + /** + * Extract CalciteEnumerableIndexScan from the input RelNode, handling RelSubset case. Since this + * rule matches EnumerableProject, we expect CalciteEnumerableIndexScan during physical + * optimization. + * + * @param input The input RelNode to extract scan from + * @return CalciteEnumerableIndexScan if found, null otherwise + */ + private static CalciteEnumerableIndexScan extractEnumerableScanFromInput(RelNode input) { + + // Case 1: Direct CalciteEnumerableIndexScan (physical scan) + if (input instanceof CalciteEnumerableIndexScan) { + return (CalciteEnumerableIndexScan) input; + } + + // Case 2: RelSubset with best plan being a CalciteEnumerableIndexScan + if (input instanceof RelSubset) { + RelSubset subset = (RelSubset) input; + RelNode bestPlan = subset.getBest(); + if (bestPlan != null) { + // Recursively check the best plan + return extractEnumerableScanFromInput(bestPlan); + } + } + + return null; + } + @Value.Immutable public interface Config extends OpenSearchRuleConfig { @@ -110,8 +202,9 @@ public interface Config extends OpenSearchRuleConfig { .oneInput( b1 -> b1.operand(EnumerableProject.class) - .predicate(PlanUtils::projectContainsExpr) - .predicate(p -> !p.containsOver()) + .predicate( + Predicate.not(Project::containsOver) + .and(PlanUtils::projectContainsExpr)) .anyInputs())); @Override diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/LimitIndexScanRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/LimitIndexScanRule.java index 5d5412ce86d..ce99431fa8b 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/LimitIndexScanRule.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/LimitIndexScanRule.java @@ -50,7 +50,7 @@ public void onMatch(RelOptRuleCall call) { } } - private static Integer extractLimitValue(RexNode fetch) { + public static Integer extractLimitValue(RexNode fetch) { // fetch is always a integer literal (specified in our PPL/SQL syntax) if (fetch instanceof RexLiteral) { return ((RexLiteral) fetch).getValueAs(Integer.class); @@ -71,7 +71,7 @@ private static Integer extractLimitValue(RexNode fetch) { * @param offset The RexNode representing the offset. * @return The extracted offset value, or null if it cannot be determined. */ - private static Integer extractOffsetValue(RexNode offset) { + public static Integer extractOffsetValue(RexNode offset) { if (Objects.isNull(offset)) { return 0; } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/OpenSearchIndexRules.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/OpenSearchIndexRules.java index c7f007bbf49..42262097333 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/OpenSearchIndexRules.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/OpenSearchIndexRules.java @@ -35,6 +35,8 @@ public class OpenSearchIndexRules { SortAggregateMeasureRule.Config.DEFAULT.toRule(); private static final RareTopPushdownRule RARE_TOP_PUSH_DOWN = RareTopPushdownRule.Config.DEFAULT.toRule(); + private static final SortExprIndexScanRule SORT_EXPR_INDEX_SCAN = + SortExprIndexScanRule.Config.DEFAULT.toRule(); // Rule that always pushes down relevance functions regardless of pushdown settings public static final RelevanceFunctionPushdownRule RELEVANCE_FUNCTION_PUSHDOWN = @@ -54,7 +56,8 @@ public class OpenSearchIndexRules { SORT_PROJECT_EXPR_TRANSPOSE, SORT_AGGREGATION_METRICS_RULE, RARE_TOP_PUSH_DOWN, - EXPAND_COLLATION_ON_PROJECT_EXPR); + EXPAND_COLLATION_ON_PROJECT_EXPR, + SORT_EXPR_INDEX_SCAN); // prevent instantiation private OpenSearchIndexRules() {} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/SortExprIndexScanRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/SortExprIndexScanRule.java new file mode 100644 index 00000000000..557eb3ce46e --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/SortExprIndexScanRule.java @@ -0,0 +1,261 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.planner.rules; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.function.Predicate; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.RelRule; +import org.apache.calcite.rel.RelFieldCollation; +import org.apache.calcite.rel.RelFieldCollation.Direction; +import org.apache.calcite.rel.core.Project; +import org.apache.calcite.rel.core.Sort; +import org.apache.calcite.rel.logical.LogicalProject; +import org.apache.calcite.rel.logical.LogicalSort; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.tuple.Pair; +import org.immutables.value.Value; +import org.opensearch.sql.calcite.utils.PlanUtils; +import org.opensearch.sql.opensearch.storage.scan.AbstractCalciteIndexScan; +import org.opensearch.sql.opensearch.storage.scan.CalciteLogicalIndexScan; +import org.opensearch.sql.opensearch.storage.scan.context.SortExprDigest; +import org.opensearch.sql.opensearch.util.OpenSearchRelOptUtil; + +/** + * Rule to match sort-project-scan RelNode pattern and push down sort expressions to OpenSearch. + * This rule identifies sort operations with complex expressions and attempts to push them down to + * the OpenSearch level for better performance. + */ +@Value.Enclosing +public class SortExprIndexScanRule extends RelRule { + + protected SortExprIndexScanRule(SortExprIndexScanRule.Config config) { + super(config); + } + + @Override + public void onMatch(RelOptRuleCall call) { + final LogicalSort sort = call.rel(0); + final LogicalProject project = call.rel(1); + final CalciteLogicalIndexScan scan = call.rel(2); + + // Only match sort - project - scan when any sort key references an expression + if (!PlanUtils.sortReferencesExpr(sort, project)) { + return; + } + + boolean allSimpleExprs = true; + Map>> orderEquivInfoMap = new HashMap<>(); + + for (RelFieldCollation relFieldCollation : sort.getCollation().getFieldCollations()) { + Optional> orderEquivInfo = + OpenSearchRelOptUtil.getOrderEquivalentInputInfo( + project.getProjects().get(relFieldCollation.getFieldIndex())); + orderEquivInfoMap.put(relFieldCollation.getFieldIndex(), orderEquivInfo); + if (allSimpleExprs && orderEquivInfo.isEmpty()) { + allSimpleExprs = false; + } + } + + if (allSimpleExprs) { + return; + } + + boolean scanProvidesRequiredCollation = + OpenSearchRelOptUtil.canScanProvideSortCollation( + scan, project, sort.collation, orderEquivInfoMap); + if (scan.isTopKPushed() && !scanProvidesRequiredCollation) { + return; + } + + // Extract sort expressions with collation information from the sort node + List sortExprDigests = + extractSortExpressionInfos(sort, project, scan, orderEquivInfoMap); + + // Check if any sort expressions can be pushed down + if (sortExprDigests.isEmpty() || !canPushDownSortExpressionInfos(sortExprDigests)) { + return; + } + + CalciteLogicalIndexScan newScan; + // If the scan's sort info already satisfies new sort, just pushdown limit if there is + if (scan.isTopKPushed() && scanProvidesRequiredCollation) { + newScan = scan.copy(); + } else { + // Attempt to push down sort expressions + newScan = scan.pushdownSortExpr(sortExprDigests); + } + + Integer limitValue = LimitIndexScanRule.extractLimitValue(sort.fetch); + Integer offsetValue = LimitIndexScanRule.extractOffsetValue(sort.offset); + if (newScan != null && limitValue != null && offsetValue != null) { + newScan = (CalciteLogicalIndexScan) newScan.pushDownLimit(sort, limitValue, offsetValue); + } + + if (newScan != null) { + Project newProject = + project.copy(sort.getTraitSet(), newScan, project.getProjects(), project.getRowType()); + call.transformTo(newProject); + } + } + + /** + * Extract sort expressions with collation information from the sort node, mapping them through + * the project if necessary. + * + * @param sort The sort node + * @param project The project node + * @param scan The scan node to get stable field references + * @param orderEquivInfoMap Order equivalence info to determine if output expression collation can + * be optimized to field collation + * @return List of SortExprDigest with stable field references or complex expressions + */ + private List extractSortExpressionInfos( + Sort sort, + Project project, + CalciteLogicalIndexScan scan, + Map>> orderEquivInfoMap) { + List sortExprDigests = new ArrayList<>(); + + List sortKeys = sort.getSortExps(); + List collations = sort.getCollation().getFieldCollations(); + + for (int i = 0; i < sortKeys.size(); i++) { + RexNode sortKey = sortKeys.get(i); + RelFieldCollation collation = collations.get(i); + + SortExprDigest info = mapThroughProject(sortKey, project, scan, collation, orderEquivInfoMap); + + if (info != null) { + sortExprDigests.add(info); + } + } + + return sortExprDigests; + } + + /** + * Map a sort key through the project to create a SortExprDigest. For simple field references, + * stores the field name for stability. For complex expressions, stores the RexNode. + * + * @param sortKey The sort key (usually a RexInputRef) + * @param project The project node + * @param scan The scan node to get field names from + * @param collation The collation information + * @param orderEquivInfoMap Order equivalence info to determine if output expression collation can + * be optimized to field collation + * @return SortExprDigest with stable field reference or complex expression + */ + private SortExprDigest mapThroughProject( + RexNode sortKey, + Project project, + CalciteLogicalIndexScan scan, + RelFieldCollation collation, + Map>> orderEquivInfoMap) { + assert sortKey instanceof RexInputRef : "sort key should be always RexInputRef"; + + RexInputRef inputRef = (RexInputRef) sortKey; + RexNode projectExpression = project.getProjects().get(inputRef.getIndex()); + // Get the field name from the scan's row type + List scanFieldNames = scan.getRowType().getFieldNames(); + + // If the project expression is a simple RexInputRef pointing to a scan field, + // or it can be optimized to sort by field, + // store the field name for stability + Optional> orderEquivalentInfo = + orderEquivInfoMap.get(collation.getFieldIndex()); + if (orderEquivalentInfo.isPresent()) { + Direction equivalentDirection = + orderEquivalentInfo.get().getRight() + ? collation.getDirection().reverse() + : collation.getDirection(); + // Create SortExprDigest with field name (stable reference) + return new SortExprDigest( + scanFieldNames.get(orderEquivalentInfo.get().getLeft()), + equivalentDirection, + collation.nullDirection); + } + + // For complex expressions, store the RexNode + return new SortExprDigest(projectExpression, collation.getDirection(), collation.nullDirection); + } + + /** + * Check if sort expressions can be pushed down to OpenSearch. Rejects literals and expressions + * that only contain literals. Only supports number and string types for sort scripts. + * + * @param sortExprDigests List of sort expression infos to check + * @return true if expressions can be pushed down, false otherwise + */ + private boolean canPushDownSortExpressionInfos(List sortExprDigests) { + for (SortExprDigest info : sortExprDigests) { + RexNode expr = info.getExpression(); + if (expr == null && StringUtils.isEmpty(info.getFieldName())) { + return false; + } else if (info.isSimpleFieldReference()) { + continue; + } + // Reject literals or constant expression - they don't provide meaningful sorting + if (expr instanceof RexLiteral + || RexUtil.isConstant(expr) + || !isSupportedSortScriptType(expr.getType().getSqlTypeName())) { + return false; + } + } + return true; + } + + /** + * Check if the SQL type is supported for OpenSearch sort scripts. Only number and string types + * are supported for sort script. + * + * @param sqlTypeName The SQL type name to check + * @return true if the type is supported for sort scripts, false otherwise + */ + private boolean isSupportedSortScriptType(SqlTypeName sqlTypeName) { + return SqlTypeName.CHAR_TYPES.contains(sqlTypeName) + || SqlTypeName.APPROX_TYPES.contains(sqlTypeName) + || SqlTypeName.INT_TYPES.contains(sqlTypeName); + } + + /** Rule configuration. */ + @Value.Immutable + public interface Config extends RelRule.Config { + SortExprIndexScanRule.Config DEFAULT = + ImmutableSortExprIndexScanRule.Config.builder() + .build() + .withOperandSupplier( + b0 -> + b0.operand(LogicalSort.class) + // Pure limit pushdown should be covered by SortProjectTransposeRule and + // OpenSearchLimitIndexScanRule + .predicate(sort -> !sort.collation.getFieldCollations().isEmpty()) + .oneInput( + b1 -> + b1.operand(LogicalProject.class) + .predicate(Predicate.not(Project::containsOver)) + .oneInput( + b2 -> + b2.operand(CalciteLogicalIndexScan.class) + .predicate( + AbstractCalciteIndexScan::noAggregatePushed) + .noInputs()))); + + @Override + default SortExprIndexScanRule toRule() { + return new SortExprIndexScanRule(this); + } + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java index bd373c4b4c5..b4bf48bd880 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java @@ -151,7 +151,8 @@ else if (node instanceof RexInputRef ref) { .getReferenceForTermQuery()); } else if (node instanceof RexCall || node instanceof RexLiteral) { return scriptBuilder.apply( - (new PredicateAnalyzer.ScriptQueryExpression(node, rowType, fieldTypes, cluster)) + (new PredicateAnalyzer.ScriptQueryExpression( + node, rowType, fieldTypes, cluster, Collections.emptyMap())) .getScript()); } throw new IllegalStateException( diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java index 3416515b08a..7a0a18c79ac 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java @@ -225,7 +225,18 @@ public void pushDownSort(List> sortBuilders) { } } - /** Pushdown size (limit) and from (offset) to DSL request. */ + /** + * Push down sort builder suppliers to DSL request. + * + * @param sortBuilderSuppliers a mixed of field sort builder suppliers and script sort builder + * suppliers + */ + public void pushDownSortSuppliers(List>> sortBuilderSuppliers) { + for (Supplier> sortBuilderSupplier : sortBuilderSuppliers) { + sourceBuilder.sort(sortBuilderSupplier.get()); + } + } + public void pushDownLimit(Integer limit, Integer offset) { // If there are multiple limit, we take the minimum among them // E.g. for `source=t | head 10 | head 5`, we take 5 diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java index 59051ca6ef0..94134ae0b4b 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java @@ -52,6 +52,7 @@ import java.util.Collections; import java.util.GregorianCalendar; import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.List; import java.util.Locale; import java.util.Map; @@ -222,7 +223,8 @@ public static QueryExpression analyzeExpression( throw new ExpressionNotAnalyzableException("Can't convert " + expression, e); } try { - return new ScriptQueryExpression(expression, rowType, fieldTypes, cluster); + return new ScriptQueryExpression( + expression, rowType, fieldTypes, cluster, Collections.emptyMap()); } catch (Throwable e2) { throw new ExpressionNotAnalyzableException("Can't convert " + expression, e2); } @@ -794,7 +796,8 @@ public Expression tryAnalyzeOperand(RexNode node) { return qe; } catch (PredicateAnalyzerException firstFailed) { try { - QueryExpression qe = new ScriptQueryExpression(node, rowType, fieldTypes, cluster); + QueryExpression qe = + new ScriptQueryExpression(node, rowType, fieldTypes, cluster, Collections.emptyMap()); if (!qe.isPartial()) { qe.updateAnalyzedNodes(node); } @@ -1448,12 +1451,14 @@ public static class ScriptQueryExpression extends QueryExpression { private RexNode analyzedNode; // use lambda to generate code lazily to avoid store generated code private final Supplier codeGenerator; + private final Map params; public ScriptQueryExpression( RexNode rexNode, RelDataType rowType, Map fieldTypes, - RelOptCluster cluster) { + RelOptCluster cluster, + Map params) { // We prevent is_null(nested_field) from being pushed down because pushed-down scripts can not // access nested fields for the time being if (rexNode instanceof RexCall @@ -1467,6 +1472,7 @@ public ScriptQueryExpression( () -> SerializationWrapper.wrapWithLangType( ScriptEngineType.CALCITE, serializer.serialize(rexNode, rowType, fieldTypes)); + this.params = params; } @Override @@ -1480,12 +1486,14 @@ public Script getScript() { throw new UnsupportedScriptException( "ScriptQueryExpression requires a valid current time from hook, but it is not set"); } + Map mergedParams = new LinkedHashMap<>(params); + mergedParams.put(Variable.UTC_TIMESTAMP.camelName, currentTime); return new Script( DEFAULT_SCRIPT_TYPE, COMPOUNDED_LANG_NAME, codeGenerator.get(), Collections.emptyMap(), - Map.of(Variable.UTC_TIMESTAMP.camelName, currentTime)); + mergedParams); } @Override diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java index 97ce0592c48..3f6b53b6c59 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java @@ -56,6 +56,7 @@ import org.opensearch.sql.opensearch.storage.scan.context.PushDownOperation; import org.opensearch.sql.opensearch.storage.scan.context.PushDownType; import org.opensearch.sql.opensearch.storage.scan.context.RareTopDigest; +import org.opensearch.sql.opensearch.storage.scan.context.SortExprDigest; /** An abstract relational operator representing a scan of an OpenSearchIndex type. */ @Getter @@ -115,7 +116,7 @@ public double estimateRowCount(RelMetadataQuery mq) { (rowCount, operation) -> switch (operation.type()) { case AGGREGATION -> mq.getRowCount((RelNode) operation.digest()); - case PROJECT, SORT -> rowCount; + case PROJECT, SORT, SORT_EXPR -> rowCount; case SORT_AGG_METRICS -> NumberUtil.min( rowCount, osIndex.getBucketSize().doubleValue()); // Refer the org.apache.calcite.rel.metadata.RelMdRowCount @@ -166,6 +167,13 @@ public double estimateRowCount(RelMetadataQuery mq) { case SORT_AGG_METRICS -> { dRows = dRows * .9 / 10; // *.9 because always bucket IS_NOT_NULL dCpu += dRows; + } + case SORT_EXPR -> { + @SuppressWarnings("unchecked") + List sortKeys = (List) operation.digest(); + long complexExprCount = + sortKeys.stream().filter(digest -> digest.getExpression() != null).count(); + dCpu += NumberUtil.multiply(dRows, 1.1 * complexExprCount); } // Refer the org.apache.calcite.rel.metadata.RelMdRowCount.getRowCount(Aggregate rel,...) case COLLAPSE -> { diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java index 3d01c81aeae..073a4f1f29f 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java @@ -6,9 +6,12 @@ package org.opensearch.sql.opensearch.storage.scan; import com.google.common.collect.ImmutableList; +import java.util.ArrayList; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.function.Supplier; import java.util.stream.Collectors; import lombok.Getter; import org.apache.calcite.plan.Convention; @@ -21,6 +24,7 @@ import org.apache.calcite.rel.RelCollation; import org.apache.calcite.rel.RelCollations; import org.apache.calcite.rel.RelFieldCollation; +import org.apache.calcite.rel.RelFieldCollation.Direction; import org.apache.calcite.rel.core.Aggregate; import org.apache.calcite.rel.core.Filter; import org.apache.calcite.rel.core.Project; @@ -31,15 +35,22 @@ import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexNode; import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.sql.type.SqlTypeName; import org.apache.commons.lang3.tuple.Pair; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.opensearch.search.aggregations.AggregationBuilder; import org.opensearch.search.aggregations.bucket.composite.CompositeAggregationBuilder; +import org.opensearch.search.sort.ScriptSortBuilder.ScriptSortType; +import org.opensearch.search.sort.SortBuilder; +import org.opensearch.search.sort.SortBuilders; +import org.opensearch.search.sort.SortOrder; import org.opensearch.sql.ast.expression.Argument; import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory; +import org.opensearch.sql.calcite.utils.PlanUtils; import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.data.type.ExprCoreType; import org.opensearch.sql.data.type.ExprType; @@ -48,6 +59,7 @@ import org.opensearch.sql.opensearch.planner.rules.EnumerableIndexScanRule; import org.opensearch.sql.opensearch.planner.rules.OpenSearchIndexRules; import org.opensearch.sql.opensearch.request.AggregateAnalyzer; +import org.opensearch.sql.opensearch.request.OpenSearchRequestBuilder.PushDownUnSupportedException; import org.opensearch.sql.opensearch.request.PredicateAnalyzer; import org.opensearch.sql.opensearch.request.PredicateAnalyzer.QueryExpression; import org.opensearch.sql.opensearch.response.agg.OpenSearchAggregationResponseParser; @@ -61,6 +73,7 @@ import org.opensearch.sql.opensearch.storage.scan.context.PushDownContext; import org.opensearch.sql.opensearch.storage.scan.context.PushDownType; import org.opensearch.sql.opensearch.storage.scan.context.RareTopDigest; +import org.opensearch.sql.opensearch.storage.scan.context.SortExprDigest; /** The logical relational operator representing a scan of an OpenSearchIndex type. */ @Getter @@ -124,7 +137,7 @@ public CalciteLogicalIndexScan copyWithNewTraitSet(RelTraitSet traitSet) { public void register(RelOptPlanner planner) { super.register(planner); planner.addRule(EnumerableIndexScanRule.DEFAULT_CONFIG.toRule()); - if (osIndex.getSettings().getSettingValue(Settings.Key.CALCITE_PUSHDOWN_ENABLED)) { + if ((Boolean) osIndex.getSettings().getSettingValue(Settings.Key.CALCITE_PUSHDOWN_ENABLED)) { // When pushdown is enabled, use normal rules (they handle everything including relevance // functions) for (RelOptRule rule : OpenSearchIndexRules.OPEN_SEARCH_INDEX_SCAN_RULES) { @@ -253,7 +266,7 @@ public CalciteLogicalIndexScan pushDownProject(List selectedColumns) { newSchema, pushDownContext.clone()); - AbstractAction action; + AbstractAction action; if (pushDownContext.isAggregatePushed()) { // For aggregate, we do nothing on query builder but only change the schema of the scan. action = (AggregationBuilderAction) aggAction -> {}; @@ -425,4 +438,95 @@ public AbstractRelNode pushDownLimit(LogicalSort sort, Integer limit, Integer of } return null; } + + /** + * Push down sort expressions to OpenSearch level. Supports mixed RexCall and field sort + * expressions. + * + * @param sortExprDigests List of SortExprDigest with expressions and collation information + * @return CalciteLogicalIndexScan with sort expressions pushed down, or null if pushdown fails + */ + public CalciteLogicalIndexScan pushdownSortExpr(List sortExprDigests) { + try { + if (sortExprDigests == null || sortExprDigests.isEmpty()) { + return null; + } + + CalciteLogicalIndexScan newScan = + new CalciteLogicalIndexScan( + getCluster(), + traitSet, + hints, + table, + osIndex, + getRowType(), + pushDownContext.cloneWithoutSort()); + + List>> sortBuilderSuppliers = new ArrayList<>(); + for (SortExprDigest digest : sortExprDigests) { + SortOrder order = + Direction.DESCENDING.equals(digest.getDirection()) ? SortOrder.DESC : SortOrder.ASC; + + if (digest.isSimpleFieldReference()) { + String missing = + switch (digest.getNullDirection()) { + case FIRST -> "_first"; + case LAST -> "_last"; + default -> null; + }; + sortBuilderSuppliers.add( + () -> SortBuilders.fieldSort(digest.getFieldName()).order(order).missing(missing)); + continue; + } + RexNode sortExpr = digest.getExpression(); + assert sortExpr instanceof RexCall : "sort expression should be RexCall"; + Map directionParams = new LinkedHashMap<>(); + directionParams.put(PlanUtils.NULL_DIRECTION, digest.getNullDirection().name()); + directionParams.put(PlanUtils.DIRECTION, digest.getDirection().name()); + // Complex expression - use ScriptQueryExpression to generate script for sort + PredicateAnalyzer.ScriptQueryExpression scriptExpr = + new PredicateAnalyzer.ScriptQueryExpression( + digest.getExpression(), + rowType, + osIndex.getAllFieldTypes(), + getCluster(), + directionParams); + // Determine the correct ScriptSortType based on the expression's return type + ScriptSortType sortType = getScriptSortType(sortExpr.getType()); + + sortBuilderSuppliers.add( + () -> SortBuilders.scriptSort(scriptExpr.getScript(), sortType).order(order)); + } + + // Create action to push down sort expressions to OpenSearch + OSRequestBuilderAction action = + requestBuilder -> requestBuilder.pushDownSortSuppliers(sortBuilderSuppliers); + + newScan.pushDownContext.add(PushDownType.SORT_EXPR, sortExprDigests, action); + return newScan; + } catch (Exception e) { + if (LOG.isDebugEnabled()) { + LOG.debug("Cannot pushdown sort expressions: {}", sortExprDigests, e); + } + } + return null; + } + + /** + * Determine the appropriate ScriptSortType based on the expression's return type. + * + * @param relDataType the return type of the expression + * @return the appropriate ScriptSortType + */ + private ScriptSortType getScriptSortType(RelDataType relDataType) { + if (SqlTypeName.CHAR_TYPES.contains(relDataType.getSqlTypeName())) { + return ScriptSortType.STRING; + } else if (SqlTypeName.INT_TYPES.contains(relDataType.getSqlTypeName()) + || SqlTypeName.APPROX_TYPES.contains(relDataType.getSqlTypeName())) { + return ScriptSortType.NUMBER; + } else { + throw new PushDownUnSupportedException( + "Unsupported type for sort expression pushdown: " + relDataType); + } + } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java index 9098d1ca17c..4a2ade440cf 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java @@ -30,6 +30,7 @@ public class PushDownContext extends AbstractCollection { private boolean isProjectPushed = false; private boolean isMeasureOrderPushed = false; private boolean isSortPushed = false; + private boolean isSortExprPushed = false; private boolean isTopKPushed = false; private boolean isRareTopPushed = false; @@ -53,7 +54,7 @@ public PushDownContext clone() { public PushDownContext cloneWithoutSort() { PushDownContext newContext = new PushDownContext(osIndex); for (PushDownOperation action : this) { - if (action.type() != PushDownType.SORT) { + if (action.type() != PushDownType.SORT && action.type() != PushDownType.SORT_EXPR) { newContext.add(action); } } @@ -101,7 +102,7 @@ public boolean add(PushDownOperation operation) { } if (operation.type() == PushDownType.LIMIT) { isLimitPushed = true; - if (isSortPushed || isMeasureOrderPushed) { + if (isSortPushed || isMeasureOrderPushed || isSortExprPushed) { isTopKPushed = true; } } @@ -111,6 +112,9 @@ public boolean add(PushDownOperation operation) { if (operation.type() == PushDownType.SORT) { isSortPushed = true; } + if (operation.type() == PushDownType.SORT_EXPR) { + isSortExprPushed = true; + } if (operation.type() == PushDownType.SORT_AGG_METRICS) { isMeasureOrderPushed = true; } @@ -128,6 +132,20 @@ public boolean containsDigest(Object digest) { return this.stream().anyMatch(action -> action.digest().equals(digest)); } + /** + * Get the digest of the first operation of a specific type. + * + * @param type The PushDownType to get the digest for + * @return The digest object, or null if no operation of the specified type exists + */ + public Object getDigestByType(PushDownType type) { + return this.stream() + .filter(operation -> operation.type() == type) + .map(PushDownOperation::digest) + .findFirst() + .orElse(null); + } + public OpenSearchRequestBuilder createRequestBuilder() { OpenSearchRequestBuilder newRequestBuilder = osIndex.createRequestBuilder(); if (operationsForRequestBuilder != null) { diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownType.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownType.java index ddb0a3d7e66..81927e9f8d6 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownType.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownType.java @@ -16,6 +16,7 @@ public enum PushDownType { COLLAPSE, SORT_AGG_METRICS, // convert composite aggregate to terms or multi-terms bucket aggregate RARE_TOP, // convert composite aggregate to nested aggregate + SORT_EXPR // HIGHLIGHT, // NESTED } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/SortExprDigest.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/SortExprDigest.java new file mode 100644 index 00000000000..5b7fb1db320 --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/SortExprDigest.java @@ -0,0 +1,105 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage.scan.context; + +import java.util.List; +import lombok.AllArgsConstructor; +import lombok.Getter; +import org.apache.calcite.rel.RelFieldCollation; +import org.apache.calcite.rex.RexNode; +import org.apache.commons.lang3.StringUtils; +import org.opensearch.sql.opensearch.storage.scan.AbstractCalciteIndexScan; + +/** + * Information about a sort expression that has been pushed down to OpenSearch. Contains both the + * expression and its collation information. For simple field references, stores the field name for + * stability across schema changes. + */ +@Getter +@AllArgsConstructor +public class SortExprDigest { + /** The RexNode expression being sorted (nullable for simple field references) */ + private final RexNode expression; + + /** The field name for simple field references (nullable for complex expressions) */ + private final String fieldName; + + /** The collation information (direction, null handling) */ + private final RelFieldCollation.Direction direction; + + /** The null direction */ + private final RelFieldCollation.NullDirection nullDirection; + + /** + * Constructor for complex expressions. + * + * @param expression The RexNode expression + * @param direction Sort direction + * @param nullDirection Null handling direction + */ + public SortExprDigest( + RexNode expression, + RelFieldCollation.Direction direction, + RelFieldCollation.NullDirection nullDirection) { + this(expression, null, direction, nullDirection); + } + + /** + * Constructor for simple field references. + * + * @param fieldName The field name + * @param direction Sort direction + * @param nullDirection Null handling direction + */ + public SortExprDigest( + String fieldName, + RelFieldCollation.Direction direction, + RelFieldCollation.NullDirection nullDirection) { + this(null, fieldName, direction, nullDirection); + } + + /** + * Check if this is a simple field reference. + * + * @return true if this represents a simple field reference, false for complex expressions + */ + public boolean isSimpleFieldReference() { + return expression == null && !StringUtils.isEmpty(fieldName); + } + + /** + * Get the effective expression for this sort info. For simple field references, creates a + * RexInputRef based on the current scan schema. + * + * @param scan The scan to get the current schema from + * @return The RexNode expression to use for sorting + */ + public RexNode getEffectiveExpression(AbstractCalciteIndexScan scan) { + if (isSimpleFieldReference()) { + // Find the field index in the current scan schema + List currentFieldNames = scan.getRowType().getFieldNames(); + int fieldIndex = currentFieldNames.indexOf(fieldName); + if (fieldIndex >= 0) { + // Create a RexInputRef for this field + return scan.getCluster() + .getRexBuilder() + .makeInputRef(scan.getRowType().getFieldList().get(fieldIndex).getType(), fieldIndex); + } + // Field not found in current schema - this shouldn't happen in normal cases + return null; + } else { + // Complex expression - return as-is + return expression; + } + } + + @Override + public String toString() { + String sortTarget = isSimpleFieldReference() ? fieldName : expression.toString(); + return String.format( + "%s %s NULLS_%s", sortTarget, direction.toString(), nullDirection.toString()); + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/CalciteScriptEngine.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/CalciteScriptEngine.java index a03fb6268ba..1644d9d0509 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/CalciteScriptEngine.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/CalciteScriptEngine.java @@ -77,8 +77,10 @@ import org.opensearch.index.fielddata.ScriptDocValues; import org.opensearch.script.AggregationScript; import org.opensearch.script.FilterScript; +import org.opensearch.script.NumberSortScript; import org.opensearch.script.ScriptContext; import org.opensearch.script.ScriptEngine; +import org.opensearch.script.StringSortScript; import org.opensearch.search.lookup.SourceLookup; import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory; import org.opensearch.sql.data.model.ExprIpValue; @@ -88,6 +90,8 @@ import org.opensearch.sql.opensearch.data.type.OpenSearchTextType; import org.opensearch.sql.opensearch.storage.script.aggregation.CalciteAggregationScriptFactory; import org.opensearch.sql.opensearch.storage.script.filter.CalciteFilterScriptFactory; +import org.opensearch.sql.opensearch.storage.script.sort.CalciteNumberSortScriptFactory; +import org.opensearch.sql.opensearch.storage.script.sort.CalciteStringSortScriptFactory; import org.opensearch.sql.opensearch.storage.serde.RelJsonSerializer; /** @@ -115,6 +119,8 @@ public CalciteScriptEngine(RelOptCluster relOptCluster) { BiFunction, RelDataType, Object>>() .put(FilterScript.CONTEXT, CalciteFilterScriptFactory::new) .put(AggregationScript.CONTEXT, CalciteAggregationScriptFactory::new) + .put(NumberSortScript.CONTEXT, CalciteNumberSortScriptFactory::new) + .put(StringSortScript.CONTEXT, CalciteStringSortScriptFactory::new) .build(); @Override diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteNumberSortScript.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteNumberSortScript.java new file mode 100644 index 00000000000..f368ac184eb --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteNumberSortScript.java @@ -0,0 +1,64 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage.script.sort; + +import java.util.Map; +import lombok.EqualsAndHashCode; +import org.apache.calcite.DataContext; +import org.apache.calcite.linq4j.function.Function1; +import org.apache.calcite.rel.RelFieldCollation.Direction; +import org.apache.calcite.rel.RelFieldCollation.NullDirection; +import org.apache.lucene.index.LeafReaderContext; +import org.opensearch.script.NumberSortScript; +import org.opensearch.search.lookup.SearchLookup; +import org.opensearch.search.lookup.SourceLookup; +import org.opensearch.sql.calcite.utils.PlanUtils; +import org.opensearch.sql.opensearch.storage.script.core.CalciteScript; + +/** Calcite number sort script. */ +@EqualsAndHashCode(callSuper = false) +public class CalciteNumberSortScript extends NumberSortScript { + + /** Calcite script. */ + private final CalciteScript calciteScript; + + private final SourceLookup sourceLookup; + private final Direction direction; + private final NullDirection nullDirection; + + public CalciteNumberSortScript( + Function1 function, + SearchLookup lookup, + LeafReaderContext context, + Map params) { + super(params, lookup, context); + this.calciteScript = new CalciteScript(function, params); + // TODO: we'd better get source from the leafLookup of super once it's available + this.sourceLookup = lookup.getLeafSearchLookup(context).source(); + this.direction = + params.containsKey(PlanUtils.DIRECTION) + ? Direction.valueOf((String) params.get(PlanUtils.DIRECTION)) + : Direction.ASCENDING; + this.nullDirection = + params.containsKey(PlanUtils.NULL_DIRECTION) + ? NullDirection.valueOf((String) params.get(PlanUtils.NULL_DIRECTION)) + : NullDirection.FIRST; + } + + @Override + public double execute() { + Object value = calciteScript.execute(this.getDoc(), this.sourceLookup)[0]; + // There is a limitation here when the Double value is exactly theoretical min/max value. + // It can't distinguish the ordering between null and exact Double.NEGATIVE_INFINITY or + // Double.NaN. + if (value == null) { + boolean isAscending = direction == Direction.ASCENDING; + boolean isNullFirst = nullDirection == NullDirection.FIRST; + return isAscending == isNullFirst ? Double.NEGATIVE_INFINITY : Double.NaN; + } + return ((Number) value).doubleValue(); + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteNumberSortScriptFactory.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteNumberSortScriptFactory.java new file mode 100644 index 00000000000..426fa5472b7 --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteNumberSortScriptFactory.java @@ -0,0 +1,38 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage.script.sort; + +import java.util.Map; +import lombok.EqualsAndHashCode; +import org.apache.calcite.DataContext; +import org.apache.calcite.linq4j.function.Function1; +import org.apache.calcite.rel.type.RelDataType; +import org.opensearch.script.NumberSortScript; +import org.opensearch.search.lookup.SearchLookup; + +/** Calcite number sort script factory that generates leaf factory. */ +@EqualsAndHashCode(callSuper = false) +public class CalciteNumberSortScriptFactory implements NumberSortScript.Factory { + + /** Generated code of calcite to execute. */ + private final Function1 function; + + public CalciteNumberSortScriptFactory( + Function1 function, RelDataType type) { + this.function = function; + } + + @Override + public boolean isResultDeterministic() { + // This implies the results are cacheable + return true; + } + + @Override + public NumberSortScript.LeafFactory newFactory(Map params, SearchLookup lookup) { + return new CalciteNumberSortScriptLeafFactory(function, params, lookup); + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteNumberSortScriptLeafFactory.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteNumberSortScriptLeafFactory.java new file mode 100644 index 00000000000..703a8946e43 --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteNumberSortScriptLeafFactory.java @@ -0,0 +1,45 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage.script.sort; + +import java.io.IOException; +import java.util.Map; +import lombok.EqualsAndHashCode; +import org.apache.calcite.DataContext; +import org.apache.calcite.linq4j.function.Function1; +import org.apache.lucene.index.LeafReaderContext; +import org.opensearch.script.NumberSortScript; +import org.opensearch.search.lookup.SearchLookup; + +/** Calcite number sort script leaf factory that produces script executor for each leaf. */ +@EqualsAndHashCode(callSuper = false) +class CalciteNumberSortScriptLeafFactory implements NumberSortScript.LeafFactory { + + private final Function1 function; + + /** Parameters for the calcite script. */ + private final Map params; + + /** Document lookup that returns doc values. */ + private final SearchLookup lookup; + + public CalciteNumberSortScriptLeafFactory( + Function1 function, Map params, SearchLookup lookup) { + this.function = function; + this.params = params; + this.lookup = lookup; + } + + @Override + public NumberSortScript newInstance(LeafReaderContext context) throws IOException { + return new CalciteNumberSortScript(function, lookup, context, params); + } + + @Override + public boolean needs_score() { + return false; + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteStringSortScript.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteStringSortScript.java new file mode 100644 index 00000000000..8e73cc0da97 --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteStringSortScript.java @@ -0,0 +1,66 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage.script.sort; + +import java.util.Map; +import lombok.EqualsAndHashCode; +import org.apache.calcite.DataContext; +import org.apache.calcite.linq4j.function.Function1; +import org.apache.calcite.rel.RelFieldCollation.Direction; +import org.apache.calcite.rel.RelFieldCollation.NullDirection; +import org.apache.lucene.index.LeafReaderContext; +import org.opensearch.script.StringSortScript; +import org.opensearch.search.lookup.SearchLookup; +import org.opensearch.search.lookup.SourceLookup; +import org.opensearch.sql.calcite.utils.PlanUtils; +import org.opensearch.sql.opensearch.storage.script.core.CalciteScript; + +/** Calcite string sort script. */ +@EqualsAndHashCode(callSuper = false) +public class CalciteStringSortScript extends StringSortScript { + + /** Calcite script. */ + private final CalciteScript calciteScript; + + private final SourceLookup sourceLookup; + private final Direction direction; + private final NullDirection nullDirection; + + private static final String MAX_SENTINEL = "\uFFFF\uFFFF_NULL_PLACEHOLDER_"; + private static final String MIN_SENTINEL = "\u0000\u0000_NULL_PLACEHOLDER_"; + + public CalciteStringSortScript( + Function1 function, + SearchLookup lookup, + LeafReaderContext context, + Map params) { + super(params, lookup, context); + this.calciteScript = new CalciteScript(function, params); + // TODO: we'd better get source from the leafLookup of super once it's available + this.sourceLookup = lookup.getLeafSearchLookup(context).source(); + this.direction = + params.containsKey(PlanUtils.DIRECTION) + ? Direction.valueOf((String) params.get(PlanUtils.DIRECTION)) + : Direction.ASCENDING; + this.nullDirection = + params.containsKey(PlanUtils.NULL_DIRECTION) + ? NullDirection.valueOf((String) params.get(PlanUtils.NULL_DIRECTION)) + : NullDirection.FIRST; + } + + @Override + public String execute() { + Object value = calciteScript.execute(this.getDoc(), this.sourceLookup)[0]; + // There is a limitation here when the String value is larger or smaller than sentinel values. + // It can't guarantee the lexigraphic ordering between null and special strings. + if (value == null) { + boolean isAscending = direction == Direction.ASCENDING; + boolean isNullFirst = nullDirection == NullDirection.FIRST; + return isAscending == isNullFirst ? MIN_SENTINEL : MAX_SENTINEL; + } + return value.toString(); + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteStringSortScriptFactory.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteStringSortScriptFactory.java new file mode 100644 index 00000000000..053361724e7 --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteStringSortScriptFactory.java @@ -0,0 +1,38 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage.script.sort; + +import java.util.Map; +import lombok.EqualsAndHashCode; +import org.apache.calcite.DataContext; +import org.apache.calcite.linq4j.function.Function1; +import org.apache.calcite.rel.type.RelDataType; +import org.opensearch.script.StringSortScript; +import org.opensearch.search.lookup.SearchLookup; + +/** Calcite string sort script factory that generates leaf factory. */ +@EqualsAndHashCode(callSuper = false) +public class CalciteStringSortScriptFactory implements StringSortScript.Factory { + + /** Generated code of calcite to execute. */ + private final Function1 function; + + public CalciteStringSortScriptFactory( + Function1 function, RelDataType type) { + this.function = function; + } + + @Override + public boolean isResultDeterministic() { + // This implies the results are cacheable + return true; + } + + @Override + public StringSortScript.LeafFactory newFactory(Map params, SearchLookup lookup) { + return new CalciteStringSortScriptLeafFactory(function, params, lookup); + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteStringSortScriptLeafFactory.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteStringSortScriptLeafFactory.java new file mode 100644 index 00000000000..a95ee30d59d --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteStringSortScriptLeafFactory.java @@ -0,0 +1,41 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage.script.sort; + +import java.io.IOException; +import java.util.Map; +import lombok.EqualsAndHashCode; +import org.apache.calcite.DataContext; +import org.apache.calcite.linq4j.function.Function1; +import org.apache.lucene.index.LeafReaderContext; +import org.opensearch.script.StringSortScript; +import org.opensearch.search.lookup.SearchLookup; + +/** Calcite string sort script leaf factory. */ +@EqualsAndHashCode(callSuper = false) +public class CalciteStringSortScriptLeafFactory implements StringSortScript.LeafFactory { + + /** Generated code of calcite to execute. */ + private final Function1 function; + + /** Script parameters. */ + private final Map params; + + /** Search lookup. */ + private final SearchLookup lookup; + + public CalciteStringSortScriptLeafFactory( + Function1 function, Map params, SearchLookup lookup) { + this.function = function; + this.params = params; + this.lookup = lookup; + } + + @Override + public StringSortScript newInstance(LeafReaderContext context) throws IOException { + return new CalciteStringSortScript(function, lookup, context, params); + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/util/OpenSearchRelOptUtil.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/util/OpenSearchRelOptUtil.java index ab3743aeaf4..90738a267ff 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/util/OpenSearchRelOptUtil.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/util/OpenSearchRelOptUtil.java @@ -10,9 +10,14 @@ import java.util.BitSet; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.Set; import lombok.experimental.UtilityClass; +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelFieldCollation; +import org.apache.calcite.rel.RelFieldCollation.Direction; +import org.apache.calcite.rel.core.Project; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.rel.type.RelDataTypeField; @@ -30,6 +35,9 @@ import org.apache.calcite.util.mapping.Mappings; import org.apache.commons.lang3.tuple.Pair; import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory; +import org.opensearch.sql.opensearch.storage.scan.AbstractCalciteIndexScan; +import org.opensearch.sql.opensearch.storage.scan.context.PushDownType; +import org.opensearch.sql.opensearch.storage.scan.context.SortExprDigest; @UtilityClass public class OpenSearchRelOptUtil { @@ -136,6 +144,33 @@ public static Optional> getOrderEquivalentInputInfo(RexNo } } + /** + * Suppose single project input is already sorted, this method evaluates whether the input field + * sort collation satisfies the simple project expression's output collation. + * + * @param sourceFieldCollation project input field collation + * @param targetFieldCollation simple project expression output collation + * @param orderEquivInfo equivalent order information that contains optional input index and + * reversed flag pair + * @return if single project input collation satisfies project expression output collation + */ + public boolean sourceCollationSatisfiesTargetCollation( + RelFieldCollation sourceFieldCollation, + RelFieldCollation targetFieldCollation, + Optional> orderEquivInfo) { + if (orderEquivInfo.isEmpty()) { + return false; + } + + int equivalentSourceIndex = orderEquivInfo.get().getLeft(); + Direction equivalentSourceDirection = + orderEquivInfo.get().getRight() + ? targetFieldCollation.getDirection().reverse() + : targetFieldCollation.getDirection(); + return equivalentSourceIndex == sourceFieldCollation.getFieldIndex() + && equivalentSourceDirection == sourceFieldCollation.getDirection(); + } + private static boolean isOrderPreservingCast(RelDataType src, RelDataType dst) { final SqlTypeName srcType = src.getSqlTypeName(); final SqlTypeName dstType = dst.getSqlTypeName(); @@ -274,4 +309,77 @@ private static String generateUniqueName(String baseName, Set usedNames) suffix++; } } + + /** + * Check if the scan can provide the required sort collation by matching toCollation's mapped + * project RexNodes with sort expressions from PushDownContext. + * + * @param scan The scan RelNode to check + * @param project The project node to match expressions against + * @param toCollation The required collation to match + * @param orderEquivInfoMap Order equivalence info to determine if output expression collation can + * be optimized to field collation + * @return true if scan can provide the required collation, false otherwise + */ + public static boolean canScanProvideSortCollation( + AbstractCalciteIndexScan scan, + Project project, + RelCollation toCollation, + Map>> orderEquivInfoMap) { + + // Check if the scan has sort expressions pushed down + if (scan.getPushDownContext().stream() + .noneMatch(operation -> operation.type() == PushDownType.SORT_EXPR)) { + return false; + } + + // Get the sort expression infos from the pushdown context + @SuppressWarnings("unchecked") + List sortExprDigests = + (List) scan.getPushDownContext().getDigestByType(PushDownType.SORT_EXPR); + if (sortExprDigests.isEmpty() + || sortExprDigests.size() < toCollation.getFieldCollations().size()) { + return false; + } + + for (int i = 0; i < toCollation.getFieldCollations().size(); i++) { + RelFieldCollation requiredFieldCollation = toCollation.getFieldCollations().get(i); + RexNode projectExpr = project.getProjects().get(requiredFieldCollation.getFieldIndex()); + SortExprDigest scanSortInfo = sortExprDigests.get(i); + // Get the effective expression for comparison + RexNode scanSortExpression = scanSortInfo.getEffectiveExpression(scan); + + // Check if the required project output matches the scan sort expression + if (scanSortExpression != null && scanSortExpression.equals(projectExpr)) { + // Check if the collation direction and null handling match + if (requiredFieldCollation.getDirection() == scanSortInfo.getDirection() + && requiredFieldCollation.nullDirection == scanSortInfo.getNullDirection()) { + // Direction or null handling mismatch + continue; + } + return false; + } + + // Check if sorting simple RexCall is equivalent to field sort + if (scanSortExpression instanceof RexInputRef && projectExpr instanceof RexCall) { + RexInputRef scanInputRef = (RexInputRef) scanSortExpression; + RelFieldCollation sourceCollation = + new RelFieldCollation( + scanInputRef.getIndex(), + scanSortInfo.getDirection(), + scanSortInfo.getNullDirection()); + if (sourceCollationSatisfiesTargetCollation( + sourceCollation, + requiredFieldCollation, + orderEquivInfoMap.get(requiredFieldCollation.getFieldIndex()))) { + continue; + } + } + + return false; + } + + // All required collations are matched + return true; + } } diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/util/OpenSearchRelOptUtilTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/util/OpenSearchRelOptUtilTest.java index a9790d6485e..9a78bbc813e 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/util/OpenSearchRelOptUtilTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/util/OpenSearchRelOptUtilTest.java @@ -8,10 +8,21 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; import java.util.Arrays; +import java.util.Collections; import java.util.List; +import java.util.Map; import java.util.Optional; +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelCollations; +import org.apache.calcite.rel.RelFieldCollation; +import org.apache.calcite.rel.RelFieldCollation.Direction; +import org.apache.calcite.rel.RelFieldCollation.NullDirection; +import org.apache.calcite.rel.core.Project; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.rel.type.RelDataTypeSystem; @@ -26,6 +37,11 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.sql.opensearch.storage.scan.AbstractCalciteIndexScan; +import org.opensearch.sql.opensearch.storage.scan.context.PushDownContext; +import org.opensearch.sql.opensearch.storage.scan.context.PushDownOperation; +import org.opensearch.sql.opensearch.storage.scan.context.PushDownType; +import org.opensearch.sql.opensearch.storage.scan.context.SortExprDigest; @ExtendWith(MockitoExtension.class) public class OpenSearchRelOptUtilTest { @@ -325,4 +341,399 @@ public void testNoDots() { List result = OpenSearchRelOptUtil.resolveColumnNameConflicts(input); assertEquals(expected, result); } + + @Test + public void testSourceCollationSatisfiesTargetCollation_DirectInputRef() { + // Source collation: col0 ASC + // Target collation: col0 ASC (output index 0) + Optional> orderEquivInfo = Optional.of(Pair.of(0, false)); + + RelFieldCollation sourceCollation = new RelFieldCollation(0, Direction.ASCENDING); + RelFieldCollation targetCollation = new RelFieldCollation(0, Direction.ASCENDING); + + assertTrue( + OpenSearchRelOptUtil.sourceCollationSatisfiesTargetCollation( + sourceCollation, targetCollation, orderEquivInfo)); + } + + @Test + public void testSourceCollationSatisfiesTargetCollation_EmptyOrderEquivInfo() { + Optional> orderEquivInfo = Optional.empty(); + + RelFieldCollation sourceCollation = new RelFieldCollation(0, Direction.DESCENDING); + RelFieldCollation targetCollation = new RelFieldCollation(0, Direction.DESCENDING); + + assertFalse( + OpenSearchRelOptUtil.sourceCollationSatisfiesTargetCollation( + sourceCollation, targetCollation, orderEquivInfo)); + } + + @Test + public void testSourceCollationSatisfiesTargetCollation_DirectInputRefDescending() { + // Source collation: col0 DESC + // Target collation: col0 DESC (output index 0) + Optional> orderEquivInfo = Optional.of(Pair.of(0, false)); + + RelFieldCollation sourceCollation = new RelFieldCollation(0, Direction.DESCENDING); + RelFieldCollation targetCollation = new RelFieldCollation(0, Direction.DESCENDING); + + assertTrue( + OpenSearchRelOptUtil.sourceCollationSatisfiesTargetCollation( + sourceCollation, targetCollation, orderEquivInfo)); + } + + @Test + public void testSourceCollationSatisfiesTargetCollation_DirectionMismatch() { + // Source collation: col0 ASC + // Target collation: col0 DESC (output index 0) + Optional> orderEquivInfo = Optional.of(Pair.of(0, false)); + + RelFieldCollation sourceCollation = new RelFieldCollation(0, Direction.ASCENDING); + RelFieldCollation targetCollation = new RelFieldCollation(0, Direction.DESCENDING); + + assertFalse( + OpenSearchRelOptUtil.sourceCollationSatisfiesTargetCollation( + sourceCollation, targetCollation, orderEquivInfo)); + } + + @Test + public void testSourceCollationSatisfiesTargetCollation_IndexMismatch() { + // Source collation: col0 ASC + // Target collation: col1 ASC (output index 1) + Optional> orderEquivInfo = Optional.of(Pair.of(1, false)); + + RelFieldCollation sourceCollation = new RelFieldCollation(0, Direction.ASCENDING); + RelFieldCollation targetCollation = new RelFieldCollation(1, Direction.ASCENDING); + + assertFalse( + OpenSearchRelOptUtil.sourceCollationSatisfiesTargetCollation( + sourceCollation, targetCollation, orderEquivInfo)); + } + + @Test + public void testSourceCollationSatisfiesTargetCollation_DirectionFlipped() { + // Source collation: col0 ASC + // Target collation: -col0 DESC (output index 0) + Optional> orderEquivInfo = Optional.of(Pair.of(0, true)); + + RelFieldCollation sourceCollation = new RelFieldCollation(0, Direction.ASCENDING); + RelFieldCollation targetCollation = new RelFieldCollation(0, Direction.DESCENDING); + + assertTrue( + OpenSearchRelOptUtil.sourceCollationSatisfiesTargetCollation( + sourceCollation, targetCollation, orderEquivInfo)); + } + + @Test + public void testSourceCollationSatisfiesTargetCollation_DirectionFlippedMismatched() { + // Source collation: col0 ASC + // Target collation: -col0 ASC (output index 0) - should be DESC + Optional> orderEquivInfo = Optional.of(Pair.of(0, true)); + + RelFieldCollation sourceCollation = new RelFieldCollation(0, Direction.ASCENDING); + RelFieldCollation targetCollation = new RelFieldCollation(0, Direction.ASCENDING); + + assertFalse( + OpenSearchRelOptUtil.sourceCollationSatisfiesTargetCollation( + sourceCollation, targetCollation, orderEquivInfo)); + } + + @Test + public void testCanScanProvideSortCollation_EmptySortExprDigests() { + Map>> orderEquivInfoMap = Collections.emptyMap(); + AbstractCalciteIndexScan scan = createMockScanWithSort(Collections.emptyList()); + Project project = createMockProject(Arrays.asList(rexBuilder.makeInputRef(inputType, 0))); + RelCollation collation = RelCollations.of(new RelFieldCollation(0, Direction.ASCENDING)); + + assertFalse( + OpenSearchRelOptUtil.canScanProvideSortCollation( + scan, project, collation, orderEquivInfoMap)); + } + + @Test + public void testCanScanProvideSortCollation_InsufficientSortExprDigests() { + // Scan has 1 sort expression, but collation requires 2 + Map>> orderEquivInfoMap = Collections.emptyMap(); + RexNode scanExpr = rexBuilder.makeInputRef(inputType, 0); + SortExprDigest sortDigest = + new SortExprDigest(scanExpr, Direction.ASCENDING, NullDirection.LAST); + AbstractCalciteIndexScan scan = createMockScanWithSort(Arrays.asList(sortDigest)); + + Project project = + createMockProject( + Arrays.asList( + rexBuilder.makeInputRef(inputType, 0), rexBuilder.makeInputRef(inputType, 1))); + RelCollation collation = + RelCollations.of( + new RelFieldCollation(0, Direction.ASCENDING), + new RelFieldCollation(1, Direction.ASCENDING)); + + assertFalse( + OpenSearchRelOptUtil.canScanProvideSortCollation( + scan, project, collation, orderEquivInfoMap)); + } + + @Test + public void testCanScanProvideSortCollation_ExactMatch() { + // Scan sorts by col0 ASC, project outputs col0, collation requires col0 ASC + Map>> orderEquivInfoMap = + Map.of(0, Optional.of(Pair.of(0, false))); + RexNode scanExpr = rexBuilder.makeInputRef(inputType, 0); + RexNode projectExpr = rexBuilder.makeInputRef(inputType, 0); + SortExprDigest sortDigest = + new SortExprDigest(scanExpr, Direction.ASCENDING, NullDirection.LAST); + AbstractCalciteIndexScan scan = createMockScanWithSort(Arrays.asList(sortDigest)); + + Project project = createMockProject(Arrays.asList(projectExpr)); + RelCollation collation = + RelCollations.of(new RelFieldCollation(0, Direction.ASCENDING, NullDirection.LAST)); + + assertTrue( + OpenSearchRelOptUtil.canScanProvideSortCollation( + scan, project, collation, orderEquivInfoMap)); + } + + @Test + public void testCanScanProvideSortCollation_DirectionMismatch() { + // Scan sorts by col0 ASC, but collation requires col0 DESC + Map>> orderEquivInfoMap = + Map.of(0, Optional.of(Pair.of(0, false))); + RexNode scanExpr = rexBuilder.makeInputRef(inputType, 0); + RexNode projectExpr = rexBuilder.makeInputRef(inputType, 0); + SortExprDigest sortDigest = + new SortExprDigest(scanExpr, Direction.ASCENDING, NullDirection.LAST); + AbstractCalciteIndexScan scan = createMockScanWithSort(Arrays.asList(sortDigest)); + + Project project = createMockProject(Arrays.asList(projectExpr)); + RelCollation collation = + RelCollations.of(new RelFieldCollation(0, Direction.DESCENDING, NullDirection.LAST)); + + assertFalse( + OpenSearchRelOptUtil.canScanProvideSortCollation( + scan, project, collation, orderEquivInfoMap)); + } + + @Test + public void testCanScanProvideSortCollation_NullDirectionMismatch() { + // Scan sorts by col0 ASC NULLS LAST, but collation requires NULLS FIRST + Map>> orderEquivInfoMap = + Map.of(0, Optional.of(Pair.of(0, false))); + RexNode scanExpr = rexBuilder.makeInputRef(inputType, 0); + RexNode projectExpr = rexBuilder.makeInputRef(inputType, 0); + SortExprDigest sortDigest = + new SortExprDigest(scanExpr, Direction.ASCENDING, NullDirection.LAST); + AbstractCalciteIndexScan scan = createMockScanWithSort(Arrays.asList(sortDigest)); + + Project project = createMockProject(Arrays.asList(projectExpr)); + RelCollation collation = + RelCollations.of(new RelFieldCollation(0, Direction.ASCENDING, NullDirection.FIRST)); + + assertFalse( + OpenSearchRelOptUtil.canScanProvideSortCollation( + scan, project, collation, orderEquivInfoMap)); + } + + @Test + public void testCanScanProvideSortCollation_ProjectTransformation() { + // Scan sorts by col0 ASC, project outputs -col0, collation requires -col0 DESC + Map>> orderEquivInfoMap = + Map.of(0, Optional.of(Pair.of(0, true))); + RexNode scanExpr = rexBuilder.makeInputRef(inputType, 0); + RexNode projectExpr = + rexBuilder.makeCall(SqlStdOperatorTable.UNARY_MINUS, rexBuilder.makeInputRef(inputType, 0)); + SortExprDigest sortDigest = + new SortExprDigest(scanExpr, Direction.ASCENDING, NullDirection.LAST); + AbstractCalciteIndexScan scan = createMockScanWithSort(Arrays.asList(sortDigest)); + + Project project = createMockProject(Arrays.asList(projectExpr)); + RelCollation collation = + RelCollations.of(new RelFieldCollation(0, Direction.DESCENDING, NullDirection.LAST)); + + assertTrue( + OpenSearchRelOptUtil.canScanProvideSortCollation( + scan, project, collation, orderEquivInfoMap)); + } + + @Test + public void testCanScanProvideSortCollation_ExpressionMismatch() { + // Scan sorts by col0, but project outputs col1 + Map>> orderEquivInfoMap = + Map.of(0, Optional.of(Pair.of(1, false))); + RexNode scanExpr = rexBuilder.makeInputRef(inputType, 0); + RexNode projectExpr = rexBuilder.makeInputRef(inputType, 1); + SortExprDigest sortDigest = + new SortExprDigest(scanExpr, Direction.ASCENDING, NullDirection.LAST); + AbstractCalciteIndexScan scan = createMockScanWithSort(Arrays.asList(sortDigest)); + + Project project = createMockProject(Arrays.asList(projectExpr)); + RelCollation collation = + RelCollations.of(new RelFieldCollation(0, Direction.ASCENDING, NullDirection.LAST)); + + assertFalse( + OpenSearchRelOptUtil.canScanProvideSortCollation( + scan, project, collation, orderEquivInfoMap)); + } + + @Test + public void testCanScanProvideSortCollation_ComplexRexCall() { + // Scan sorts by (col0 + col1) ASC, project outputs (col0 + col1), collation requires (col0 + + // col1) ASC + Map>> orderEquivInfoMap = Map.of(0, Optional.empty()); + RexNode scanExpr = + rexBuilder.makeCall( + SqlStdOperatorTable.PLUS, + rexBuilder.makeInputRef(inputType, 0), + rexBuilder.makeInputRef(inputType, 1)); + RexNode projectExpr = + rexBuilder.makeCall( + SqlStdOperatorTable.PLUS, + rexBuilder.makeInputRef(inputType, 0), + rexBuilder.makeInputRef(inputType, 1)); + SortExprDigest sortDigest = + new SortExprDigest(scanExpr, Direction.ASCENDING, NullDirection.LAST); + AbstractCalciteIndexScan scan = createMockScanWithSort(Arrays.asList(sortDigest)); + + Project project = createMockProject(Arrays.asList(projectExpr)); + RelCollation collation = + RelCollations.of(new RelFieldCollation(0, Direction.ASCENDING, NullDirection.LAST)); + + assertTrue( + OpenSearchRelOptUtil.canScanProvideSortCollation( + scan, project, collation, orderEquivInfoMap)); + } + + @Test + public void testCanScanProvideSortCollation_ComplexRexCall_DifferentExpression() { + // Scan sorts by (col0 + 10), but project outputs (col0 + 20) - should not match + Map>> orderEquivInfoMap = + Map.of(0, Optional.of(Pair.of(0, false))); + RexNode scanExpr = + rexBuilder.makeCall( + SqlStdOperatorTable.PLUS, + rexBuilder.makeInputRef(inputType, 0), + rexBuilder.makeLiteral(10, inputType)); + RexNode projectExpr = + rexBuilder.makeCall( + SqlStdOperatorTable.PLUS, + rexBuilder.makeInputRef(inputType, 0), + rexBuilder.makeLiteral(20, inputType)); + SortExprDigest sortDigest = + new SortExprDigest(scanExpr, Direction.ASCENDING, NullDirection.LAST); + AbstractCalciteIndexScan scan = createMockScanWithSort(Arrays.asList(sortDigest)); + + Project project = createMockProject(Arrays.asList(projectExpr)); + RelCollation collation = + RelCollations.of(new RelFieldCollation(0, Direction.ASCENDING, NullDirection.LAST)); + + assertFalse( + OpenSearchRelOptUtil.canScanProvideSortCollation( + scan, project, collation, orderEquivInfoMap)); + } + + @Test + public void testCanScanProvideSortCollation_ComplexRexCall_MixedSimpleAndComplex() { + // Scan sorts by col0 ASC, (col1 + 5) DESC + // Project outputs col0, (col1 + 5) + // Collation requires col0 ASC, (col1 + 5) DESC + Map>> orderEquivInfoMap = + Map.of(0, Optional.of(Pair.of(0, false))); + RexNode scanExpr0 = rexBuilder.makeInputRef(inputType, 0); + RexNode scanExpr1 = + rexBuilder.makeCall( + SqlStdOperatorTable.PLUS, + rexBuilder.makeInputRef(inputType, 1), + rexBuilder.makeLiteral(5, inputType)); + + RexNode projectExpr0 = rexBuilder.makeInputRef(inputType, 0); + RexNode projectExpr1 = + rexBuilder.makeCall( + SqlStdOperatorTable.PLUS, + rexBuilder.makeInputRef(inputType, 1), + rexBuilder.makeLiteral(5, inputType)); + + SortExprDigest sortDigest0 = + new SortExprDigest(scanExpr0, Direction.ASCENDING, NullDirection.LAST); + SortExprDigest sortDigest1 = + new SortExprDigest(scanExpr1, Direction.DESCENDING, NullDirection.FIRST); + AbstractCalciteIndexScan scan = createMockScanWithSort(Arrays.asList(sortDigest0, sortDigest1)); + + Project project = createMockProject(Arrays.asList(projectExpr0, projectExpr1)); + RelCollation collation = + RelCollations.of( + new RelFieldCollation(0, Direction.ASCENDING, NullDirection.LAST), + new RelFieldCollation(1, Direction.DESCENDING, NullDirection.FIRST)); + + assertTrue( + OpenSearchRelOptUtil.canScanProvideSortCollation( + scan, project, collation, orderEquivInfoMap)); + } + + @Test + public void testCanScanProvideSortCollation_ComplexRexCall_PartialMatch() { + // Scan sorts by (col0 + 10) ASC, col1 DESC + // Project outputs (col0 + 10), col1 + // Collation requires only (col0 + 10) ASC - should match (prefix match) + Map>> orderEquivInfoMap = + Map.of(0, Optional.of(Pair.of(0, false))); + RexNode scanExpr0 = + rexBuilder.makeCall( + SqlStdOperatorTable.PLUS, + rexBuilder.makeInputRef(inputType, 0), + rexBuilder.makeLiteral(10, inputType)); + RexNode scanExpr1 = rexBuilder.makeInputRef(inputType, 1); + + RexNode projectExpr0 = + rexBuilder.makeCall( + SqlStdOperatorTable.PLUS, + rexBuilder.makeInputRef(inputType, 0), + rexBuilder.makeLiteral(10, inputType)); + RexNode projectExpr1 = rexBuilder.makeInputRef(inputType, 1); + + SortExprDigest sortDigest0 = + new SortExprDigest(scanExpr0, Direction.ASCENDING, NullDirection.LAST); + SortExprDigest sortDigest1 = + new SortExprDigest(scanExpr1, Direction.DESCENDING, NullDirection.LAST); + AbstractCalciteIndexScan scan = createMockScanWithSort(Arrays.asList(sortDigest0, sortDigest1)); + + Project project = createMockProject(Arrays.asList(projectExpr0, projectExpr1)); + RelCollation collation = + RelCollations.of(new RelFieldCollation(0, Direction.ASCENDING, NullDirection.LAST)); + + assertTrue( + OpenSearchRelOptUtil.canScanProvideSortCollation( + scan, project, collation, orderEquivInfoMap)); + } + + private Project createMockProject(List projects) { + Project project = mock(Project.class, org.mockito.Mockito.withSettings().lenient()); + when(project.getProjects()).thenReturn(projects); + return project; + } + + // Create mock scan with list of sortExprDigest + private AbstractCalciteIndexScan createMockScanWithSort(List sortDigests) { + AbstractCalciteIndexScan scan = + mock(AbstractCalciteIndexScan.class, org.mockito.Mockito.withSettings().lenient()); + PushDownContext context = + mock(PushDownContext.class, org.mockito.Mockito.withSettings().lenient()); + PushDownOperation sortOperation = + mock(PushDownOperation.class, org.mockito.Mockito.withSettings().lenient()); + + when(scan.getPushDownContext()).thenReturn(context); + when(context.stream()).thenReturn(Arrays.asList(sortOperation).stream()); + when(sortOperation.type()).thenReturn(PushDownType.SORT_EXPR); + when(context.getDigestByType(PushDownType.SORT_EXPR)).thenReturn(sortDigests); + + // Mock the cluster and RexBuilder for getEffectiveExpression + RelOptCluster cluster = mock(RelOptCluster.class, org.mockito.Mockito.withSettings().lenient()); + when(scan.getCluster()).thenReturn(cluster); + when(cluster.getRexBuilder()).thenReturn(rexBuilder); + + // Mock the row type + RelDataType rowType = + typeFactory.builder().add("col0", inputType).add("col1", inputType).build(); + when(scan.getRowType()).thenReturn(rowType); + + return scan; + } }