Skip to content

Commit 8a53481

Browse files
authored
perf(cubestore): ProjectionAboveLimit query optimization (#8984)
1 parent ead97b4 commit 8a53481

File tree

3 files changed

+687
-21
lines changed

3 files changed

+687
-21
lines changed

rust/cubestore/cubestore-sql-tests/src/tests.rs

Lines changed: 21 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -4099,37 +4099,38 @@ async fn planning_topk_having(service: Box<dyn SqlClient>) {
40994099
\n Empty"
41004100
);
41014101

4102-
let p = service
4103-
.plan_query(
4104-
"SELECT `url` `url`, SUM(`hits`) `hits`, CARDINALITY(MERGE(`uhits`)) `uhits` \
4102+
let query = "SELECT `url` `url`, SUM(`hits`) `hits`, CARDINALITY(MERGE(`uhits`)) `uhits` \
41054103
FROM (SELECT * FROM s.Data1 \
41064104
UNION ALL \
41074105
SELECT * FROM s.Data2) AS `Data` \
41084106
GROUP BY 1 \
41094107
HAVING SUM(`hits`) > 10 AND CARDINALITY(MERGE(`uhits`)) > 5 \
41104108
ORDER BY 2 DESC \
4111-
LIMIT 3",
4112-
)
4113-
.await
4114-
.unwrap();
4109+
LIMIT 3";
4110+
let p = service.plan_query(query).await.unwrap();
41154111
let mut show_hints = PPOptions::default();
41164112
show_hints.show_filters = true;
41174113
assert_eq!(
41184114
pp_phys_plan_ext(p.worker.as_ref(), &show_hints),
4119-
"Projection, [url, SUM(Data.hits)@1:hits, CARDINALITY(MERGE(Data.uhits)@2):uhits]\
4120-
\n AggregateTopK, limit: 3, having: SUM(Data.hits)@1 > 10 AND CAST(CARDINALITY(MERGE(Data.uhits)@2) AS Int64) > 5\
4121-
\n Worker\
4122-
\n Sort\
4123-
\n FullInplaceAggregate\
4124-
\n MergeSort\
4125-
\n Union\
4126-
\n MergeSort\
4127-
\n Scan, index: default:1:[1]:sort_on[url], fields: *\
4128-
\n Empty\
4129-
\n MergeSort\
4130-
\n Scan, index: default:2:[2]:sort_on[url], fields: *\
4131-
\n Empty"
4115+
"Projection, [url, hits, CARDINALITY(MERGE(Data.uhits)@2):uhits]\
4116+
\n Projection, [url, SUM(Data.hits)@1:hits, MERGE(Data.uhits)@2:MERGE(uhits)]\
4117+
\n AggregateTopK, limit: 3, having: SUM(Data.hits)@1 > 10 AND CAST(CARDINALITY(MERGE(Data.uhits)@2) AS Int64) > 5\
4118+
\n Worker\
4119+
\n Sort\
4120+
\n FullInplaceAggregate\
4121+
\n MergeSort\
4122+
\n Union\
4123+
\n MergeSort\
4124+
\n Scan, index: default:1:[1]:sort_on[url], fields: *\
4125+
\n Empty\
4126+
\n MergeSort\
4127+
\n Scan, index: default:2:[2]:sort_on[url], fields: *\
4128+
\n Empty"
41324129
);
4130+
// Checking execution because the column name MERGE(Data.uhits) in the top projection in the
4131+
// above assertion seems incorrect, but the column number is correct.
4132+
let result = service.exec_query(query).await.unwrap();
4133+
assert_eq!(result.len(), 0);
41334134
}
41344135
async fn planning_topk_hll(service: Box<dyn SqlClient>) {
41354136
service.exec_query("CREATE SCHEMA s").await.unwrap();

rust/cubestore/cubestore/src/queryplanner/mod.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ pub use planning::PlanningMeta;
88
mod check_memory;
99
pub mod physical_plan_flags;
1010
pub mod pretty_printers;
11+
mod projection_above_limit;
1112
pub mod query_executor;
1213
pub mod serialized_plan;
1314
mod tail_limit;
@@ -40,6 +41,7 @@ use crate::queryplanner::info_schema::{
4041
};
4142
use crate::queryplanner::now::MaterializeNow;
4243
use crate::queryplanner::planning::{choose_index_ext, ClusterSendNode};
44+
use crate::queryplanner::projection_above_limit::ProjectionAboveLimit;
4345
use crate::queryplanner::query_executor::{
4446
batches_to_dataframe, ClusterSendExec, InlineTableProvider,
4547
};
@@ -199,7 +201,8 @@ impl QueryPlannerImpl {
199201
ExecutionConfig::new()
200202
.with_metadata_cache_factory(self.metadata_cache_factory.clone())
201203
.add_optimizer_rule(Arc::new(MaterializeNow {}))
202-
.add_optimizer_rule(Arc::new(FlattenUnion {})),
204+
.add_optimizer_rule(Arc::new(FlattenUnion {}))
205+
.add_optimizer_rule(Arc::new(ProjectionAboveLimit {})),
203206
)))
204207
}
205208
}

0 commit comments

Comments
 (0)