Skip to content

Commit f7efc2c

Browse files
authored
feat(query): add rule_deduplicated_sort (#18544)
* feat(query): add rule_deduplicated_sort * fix code style * fix code style * fix * fix * fix test
1 parent bc44f58 commit f7efc2c

File tree

6 files changed

+137
-1
lines changed

6 files changed

+137
-1
lines changed

src/query/sql/src/planner/optimizer/optimizers/rule/factory.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ use databend_common_exception::Result;
1818

1919
use crate::optimizer::optimizers::rule::RuleCommuteJoin;
2020
use crate::optimizer::optimizers::rule::RuleCommuteJoinBaseTable;
21+
use crate::optimizer::optimizers::rule::RuleDeduplicateSort;
2122
use crate::optimizer::optimizers::rule::RuleEagerAggregation;
2223
use crate::optimizer::optimizers::rule::RuleEliminateEvalScalar;
2324
use crate::optimizer::optimizers::rule::RuleEliminateFilter;
@@ -114,6 +115,7 @@ impl RuleFactory {
114115
RuleID::PushDownPrewhere => Ok(Box::new(RulePushDownPrewhere::new(metadata))),
115116
RuleID::TryApplyAggIndex => Ok(Box::new(RuleTryApplyAggIndex::new(metadata))),
116117
RuleID::EliminateSort => Ok(Box::new(RuleEliminateSort::new())),
118+
RuleID::DeduplicateSort => Ok(Box::new(RuleDeduplicateSort::new())),
117119
RuleID::SemiToInnerJoin => Ok(Box::new(RuleSemiToInnerJoin::new())),
118120
RuleID::MergeFilterIntoMutation => {
119121
Ok(Box::new(RuleMergeFilterIntoMutation::new(metadata)))

src/query/sql/src/planner/optimizer/optimizers/rule/rule.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ use crate::optimizer::optimizers::rule::TransformResult;
2727
pub static DEFAULT_REWRITE_RULES: LazyLock<Vec<RuleID>> = LazyLock::new(|| {
2828
vec![
2929
RuleID::EliminateSort,
30+
RuleID::DeduplicateSort,
3031
RuleID::EliminateUnion,
3132
RuleID::MergeEvalScalar,
3233
// Filter
@@ -113,6 +114,7 @@ pub enum RuleID {
113114
EliminateEvalScalar,
114115
EliminateFilter,
115116
EliminateSort,
117+
DeduplicateSort,
116118
MergeEvalScalar,
117119
MergeFilter,
118120
GroupingSetsToUnion,
@@ -158,6 +160,7 @@ impl Display for RuleID {
158160
RuleID::EliminateEvalScalar => write!(f, "EliminateEvalScalar"),
159161
RuleID::EliminateFilter => write!(f, "EliminateFilter"),
160162
RuleID::EliminateSort => write!(f, "EliminateSort"),
163+
RuleID::DeduplicateSort => write!(f, "DeduplicateSort"),
161164
RuleID::MergeEvalScalar => write!(f, "MergeEvalScalar"),
162165
RuleID::MergeFilter => write!(f, "MergeFilter"),
163166
RuleID::NormalizeScalarFilter => write!(f, "NormalizeScalarFilter"),

src/query/sql/src/planner/optimizer/optimizers/rule/sort_rules/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15+
mod rule_deduplicate_sort;
1516
mod rule_eliminate_sort;
1617

18+
pub use rule_deduplicate_sort::RuleDeduplicateSort;
1719
pub use rule_eliminate_sort::RuleEliminateSort;
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
// Copyright 2021 Datafuse Labs
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
use databend_common_exception::Result;
16+
17+
use crate::optimizer::ir::Matcher;
18+
use crate::optimizer::ir::SExpr;
19+
use crate::optimizer::optimizers::rule::Rule;
20+
use crate::optimizer::optimizers::rule::RuleID;
21+
use crate::optimizer::optimizers::rule::TransformResult;
22+
use crate::plans::RelOp;
23+
use crate::plans::Sort;
24+
25+
/// Rule to remove duplicate sort items in ORDER BY clause.
26+
///
27+
/// For example, transforms:
28+
/// ORDER BY x ASC NULLS LAST, x ASC NULLS LAST
29+
/// into:
30+
/// ORDER BY x ASC NULLS LAST
31+
///
32+
/// This optimization is valid because duplicate sort fields don't contribute
33+
/// additional ordering and only add unnecessary computational overhead.
34+
pub struct RuleDeduplicateSort {
35+
id: RuleID,
36+
matchers: Vec<Matcher>,
37+
}
38+
39+
impl RuleDeduplicateSort {
40+
pub fn new() -> Self {
41+
Self {
42+
id: RuleID::DeduplicateSort,
43+
// Sort
44+
// \
45+
// *
46+
matchers: vec![Matcher::MatchOp {
47+
op_type: RelOp::Sort,
48+
children: vec![Matcher::Leaf],
49+
}],
50+
}
51+
}
52+
}
53+
54+
impl Rule for RuleDeduplicateSort {
55+
fn id(&self) -> RuleID {
56+
self.id
57+
}
58+
59+
fn apply(&self, s_expr: &SExpr, state: &mut TransformResult) -> Result<()> {
60+
let sort: Sort = s_expr.plan().clone().try_into()?;
61+
62+
if sort.items.len() <= 1 {
63+
return Ok(());
64+
}
65+
66+
// Deduplicate sort items while preserving order
67+
let mut deduplicated_items = Vec::with_capacity(sort.items.len());
68+
let mut seen = std::collections::HashSet::with_capacity(sort.items.len());
69+
70+
for item in &sort.items {
71+
if seen.insert(item.clone()) {
72+
deduplicated_items.push(item.clone());
73+
}
74+
}
75+
76+
// Only apply transformation if we actually removed duplicates
77+
if deduplicated_items.len() == sort.items.len() {
78+
return Ok(());
79+
}
80+
81+
let new_sort = Sort {
82+
items: deduplicated_items,
83+
limit: sort.limit,
84+
after_exchange: sort.after_exchange,
85+
pre_projection: sort.pre_projection,
86+
window_partition: sort.window_partition,
87+
};
88+
89+
let mut result = s_expr.replace_plan(std::sync::Arc::new(new_sort.into()));
90+
result.set_applied_rule(&self.id);
91+
state.add_result(result);
92+
93+
Ok(())
94+
}
95+
96+
fn matchers(&self) -> &[Matcher] {
97+
&self.matchers
98+
}
99+
}
100+
101+
impl Default for RuleDeduplicateSort {
102+
fn default() -> Self {
103+
Self::new()
104+
}
105+
}

tests/sqllogictests/suites/mode/standalone/explain/eliminate_sort.test

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ explain select * from (select * from numbers(10) t order by t.number desc) order
5858
----
5959
Sort(Single)
6060
├── output columns: [t.number (#0)]
61-
├── sort keys: [number DESC NULLS LAST, number DESC NULLS LAST]
61+
├── sort keys: [number DESC NULLS LAST]
6262
├── estimated rows: 10.00
6363
└── Sort(Single)
6464
├── output columns: [t.number (#0)]
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
statement ok
2+
CREATE TABLE emp (deptno INT, job STRING, sal INT);
3+
4+
query T
5+
EXPLAIN SELECT deptno AS d, job, sal
6+
FROM emp
7+
ORDER BY deptno, d, job ASC, job DESC, sal NULLS FIRST, sal NULLS LAST, deptno;
8+
----
9+
Sort(Single)
10+
├── output columns: [emp.deptno (#0), emp.job (#1), emp.sal (#2)]
11+
├── sort keys: [deptno ASC NULLS LAST, job ASC NULLS LAST, job DESC NULLS LAST, sal ASC NULLS FIRST, sal ASC NULLS LAST]
12+
├── estimated rows: 0.00
13+
└── TableScan
14+
├── table: default.default.emp
15+
├── output columns: [deptno (#0), job (#1), sal (#2)]
16+
├── read rows: 0
17+
├── read size: 0
18+
├── partitions total: 0
19+
├── partitions scanned: 0
20+
├── push downs: [filters: [], limit: NONE]
21+
└── estimated rows: 0.00
22+
23+
statement ok
24+
DROP TABLE emp;

0 commit comments

Comments
 (0)