Skip to content

Commit d8423f8

Browse files
authored
chore(cubesql): Add benchmarks for large data models
1 parent 7fc7880 commit d8423f8

File tree

5 files changed

+242
-8
lines changed

5 files changed

+242
-8
lines changed

rust/cubesql/cubesql/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,3 +81,7 @@ harness = false
8181
[[bench]]
8282
name = "benchmarks"
8383
harness = false
84+
85+
[[bench]]
86+
name = "large_model"
87+
harness = false

rust/cubesql/cubesql/benches/benchmarks.rs

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
11
use criterion::{criterion_group, criterion_main, Criterion};
2-
use cubesql::compile::test::rewrite_engine::{
3-
cube_context, query_to_logical_plan, rewrite_rules, rewrite_runner,
2+
use cubesql::compile::test::{
3+
get_test_tenant_ctx,
4+
rewrite_engine::{cube_context, query_to_logical_plan, rewrite_rules, rewrite_runner},
45
};
56
use itertools::Itertools;
67
use std::sync::Arc;
78

89
macro_rules! bench_func {
910
($NAME:expr, $QUERY:expr, $CRITERION:expr) => {{
10-
let context = Arc::new(futures::executor::block_on(cube_context()));
11+
let context = Arc::new(futures::executor::block_on(cube_context(
12+
get_test_tenant_ctx(),
13+
)));
1114
let plan = query_to_logical_plan($QUERY, &context);
1215
let rules = rewrite_rules(context.clone());
1316

Lines changed: 226 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,226 @@
1+
use std::{env::set_var, sync::Arc, time::Duration};
2+
3+
use criterion::{criterion_group, criterion_main, Criterion};
4+
5+
use cubeclient::models::{V1CubeMeta, V1CubeMetaDimension, V1CubeMetaMeasure};
6+
use cubesql::compile::{
7+
test::{
8+
rewrite_engine::{cube_context, query_to_logical_plan, rewrite_rules, rewrite_runner},
9+
sql_generator,
10+
},
11+
MetaContext,
12+
};
13+
use egg::StopReason;
14+
use itertools::Itertools;
15+
use uuid::Uuid;
16+
17+
macro_rules! bench_large_model {
18+
($DIMS:expr, $NAME:expr, $QUERY_FN:expr, $CRITERION:expr) => {{
19+
let context = Arc::new(futures::executor::block_on(cube_context(
20+
get_large_model_test_tenant_ctx($DIMS),
21+
)));
22+
let plan = query_to_logical_plan($QUERY_FN($DIMS), &context);
23+
let rules = rewrite_rules(context.clone());
24+
25+
let bench_name = format!("large_model_{}_{}", $DIMS, $NAME);
26+
$CRITERION.bench_function(&bench_name, |b| {
27+
b.iter(|| {
28+
let context = context.clone();
29+
let plan = plan.clone();
30+
let rules = rules.clone();
31+
32+
let runner = rewrite_runner(plan, context);
33+
let stop_reason = runner.run(&rules).stop_reason.unwrap();
34+
if !matches!(stop_reason, StopReason::Saturated) {
35+
panic!(
36+
"Error running {} benchmark: stop reason is {:?}",
37+
bench_name, stop_reason
38+
);
39+
}
40+
})
41+
});
42+
}};
43+
}
44+
45+
pub fn get_large_model_test_tenant_ctx(dims: usize) -> Arc<MetaContext> {
46+
Arc::new(MetaContext::new(
47+
get_large_model_test_meta(dims),
48+
vec![(format!("LargeCube_{}", dims), "default".to_string())]
49+
.into_iter()
50+
.collect(),
51+
vec![("default".to_string(), sql_generator(vec![]))]
52+
.into_iter()
53+
.collect(),
54+
Uuid::new_v4(),
55+
))
56+
}
57+
58+
pub fn get_large_model_test_meta(dims: usize) -> Vec<V1CubeMeta> {
59+
if dims < 1 {
60+
panic!("Number of dimensions should be at least 1");
61+
}
62+
63+
let cube_name = format!("LargeCube_{}", dims);
64+
vec![V1CubeMeta {
65+
name: cube_name.clone(),
66+
title: None,
67+
measures: vec![
68+
V1CubeMetaMeasure {
69+
name: format!("{}.count", cube_name),
70+
title: None,
71+
_type: "number".to_string(),
72+
agg_type: Some("count".to_string()),
73+
},
74+
V1CubeMetaMeasure {
75+
name: format!("{}.sum", cube_name),
76+
title: None,
77+
_type: "number".to_string(),
78+
agg_type: Some("sum".to_string()),
79+
},
80+
],
81+
dimensions: (1..=dims)
82+
.map(|n| V1CubeMetaDimension {
83+
name: format!("{}.n{}", cube_name, n),
84+
_type: "number".to_string(),
85+
})
86+
.collect(),
87+
segments: vec![],
88+
joins: None,
89+
}]
90+
}
91+
92+
fn select_one_dimension(dims: usize) -> String {
93+
format!(
94+
r#"
95+
SELECT n1 AS n1
96+
FROM LargeCube_{}
97+
GROUP BY 1
98+
"#,
99+
dims,
100+
)
101+
}
102+
103+
fn select_wildcard(dims: usize) -> String {
104+
format!(
105+
r#"
106+
SELECT *
107+
FROM LargeCube_{}
108+
"#,
109+
dims,
110+
)
111+
}
112+
113+
fn select_all_dimensions(dims: usize) -> String {
114+
let select_expr = Itertools::intersperse(
115+
(1..=dims).map(|n| format!("n{} AS n{}", n, n)),
116+
", ".to_string(),
117+
)
118+
.collect::<String>();
119+
let group_expr = Itertools::intersperse((1..=dims).map(|n| n.to_string()), ", ".to_string())
120+
.collect::<String>();
121+
format!(
122+
r#"
123+
SELECT {}
124+
FROM LargeCube_{}
125+
GROUP BY {}
126+
"#,
127+
select_expr, dims, group_expr,
128+
)
129+
}
130+
131+
fn select_all_dimensions_with_filter(dims: usize) -> String {
132+
let select_expr = Itertools::intersperse(
133+
(1..=dims).map(|n| format!("n{} AS n{}", n, n)),
134+
", ".to_string(),
135+
)
136+
.collect::<String>();
137+
let group_expr = Itertools::intersperse((1..=dims).map(|n| n.to_string()), ", ".to_string())
138+
.collect::<String>();
139+
format!(
140+
r#"
141+
SELECT {}
142+
FROM LargeCube_{}
143+
WHERE n1 > 10
144+
GROUP BY {}
145+
"#,
146+
select_expr, dims, group_expr,
147+
)
148+
}
149+
150+
fn select_many_filters(dims: usize) -> String {
151+
let select_expr = Itertools::intersperse(
152+
(1..=dims).map(|n| format!("n{} AS n{}", n, n)),
153+
", ".to_string(),
154+
)
155+
.collect::<String>();
156+
let filter_expr = Itertools::intersperse(
157+
(1..=dims).map(|n| format!("n{} > 10", n)),
158+
" AND ".to_string(),
159+
)
160+
.collect::<String>();
161+
let group_expr = Itertools::intersperse((1..=dims).map(|n| n.to_string()), ", ".to_string())
162+
.collect::<String>();
163+
format!(
164+
r#"
165+
SELECT {}
166+
FROM LargeCube_{}
167+
WHERE {}
168+
GROUP BY {}
169+
"#,
170+
select_expr, dims, filter_expr, group_expr,
171+
)
172+
}
173+
174+
fn large_model_100_dims(c: &mut Criterion) {
175+
// This is required for `select_many_filters` test, remove after flattening filters
176+
set_var("CUBESQL_REWRITE_MAX_NODES", "100000");
177+
178+
let dims = 100;
179+
bench_large_model!(dims, "select_one_dimension", select_one_dimension, c);
180+
bench_large_model!(dims, "select_wildcard", select_wildcard, c);
181+
bench_large_model!(dims, "select_all_dimensions", select_all_dimensions, c);
182+
bench_large_model!(
183+
dims,
184+
"select_all_dimensions_with_filter",
185+
select_all_dimensions_with_filter,
186+
c
187+
);
188+
bench_large_model!(dims, "select_many_filters", select_many_filters, c);
189+
}
190+
191+
fn large_model_300_dims(c: &mut Criterion) {
192+
let dims = 300;
193+
bench_large_model!(dims, "select_one_dimension", select_one_dimension, c);
194+
bench_large_model!(dims, "select_wildcard", select_wildcard, c);
195+
bench_large_model!(dims, "select_all_dimensions", select_all_dimensions, c);
196+
bench_large_model!(
197+
dims,
198+
"select_all_dimensions_with_filter",
199+
select_all_dimensions_with_filter,
200+
c
201+
);
202+
// `select_many_filters` takes too long with 300 filters; requires flattening
203+
//bench_large_model!(dims, "select_many_filters", select_many_filters, c);
204+
}
205+
206+
fn large_model_1000_dims(c: &mut Criterion) {
207+
let dims = 1000;
208+
bench_large_model!(dims, "select_one_dimension", select_one_dimension, c);
209+
bench_large_model!(dims, "select_wildcard", select_wildcard, c);
210+
bench_large_model!(dims, "select_all_dimensions", select_all_dimensions, c);
211+
bench_large_model!(
212+
dims,
213+
"select_all_dimensions_with_filter",
214+
select_all_dimensions_with_filter,
215+
c
216+
);
217+
// `select_many_filters` takes too long with 1000 filters; requires flattening
218+
//bench_large_model!(dims, "select_many_filters", select_many_filters, c);
219+
}
220+
221+
criterion_group! {
222+
name = large_model;
223+
config = Criterion::default().measurement_time(Duration::from_secs(15)).sample_size(10);
224+
targets = large_model_100_dims, large_model_300_dims, large_model_1000_dims
225+
}
226+
criterion_main!(large_model);

rust/cubesql/cubesql/src/compile/test/mod.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -403,7 +403,9 @@ pub fn get_test_tenant_ctx_customized(custom_templates: Vec<(String, String)>) -
403403
))
404404
}
405405

406-
fn sql_generator(custom_templates: Vec<(String, String)>) -> Arc<dyn SqlGenerator + Send + Sync> {
406+
pub fn sql_generator(
407+
custom_templates: Vec<(String, String)>,
408+
) -> Arc<dyn SqlGenerator + Send + Sync> {
407409
Arc::new(SqlGeneratorMock {
408410
sql_templates: Arc::new(
409411
SqlTemplates::new(

rust/cubesql/cubesql/src/compile/test/rewrite_engine.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use datafusion::{
66
};
77
use egg::Rewrite;
88

9-
use super::{get_test_session, get_test_tenant_ctx};
9+
use super::get_test_session;
1010
use crate::{
1111
compile::{
1212
engine::provider::CubeContext,
@@ -17,14 +17,13 @@ use crate::{
1717
rewriter::Rewriter,
1818
LogicalPlanLanguage,
1919
},
20-
rewrite_statement, QueryPlanner,
20+
rewrite_statement, MetaContext, QueryPlanner,
2121
},
2222
config::{ConfigObj, ConfigObjImpl},
2323
sql::session::DatabaseProtocol,
2424
};
2525

26-
pub async fn cube_context() -> CubeContext {
27-
let meta = get_test_tenant_ctx();
26+
pub async fn cube_context(meta: Arc<MetaContext>) -> CubeContext {
2827
let session = get_test_session(DatabaseProtocol::PostgreSQL, meta.clone()).await;
2928
let planner = QueryPlanner::new(session.state.clone(), meta, session.session_manager.clone());
3029
let ctx = planner.create_execution_ctx();

0 commit comments

Comments
 (0)