Skip to content

Commit 66aa01d

Browse files
authored
feat(cubesql): Flatten IN lists expressions to improve performance (#8235)
1 parent 2c56be6 commit 66aa01d

File tree

7 files changed

+185
-135
lines changed

7 files changed

+185
-135
lines changed

rust/cubesql/cubesql/benches/benchmarks.rs

Lines changed: 76 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use criterion::{criterion_group, criterion_main, Criterion};
22
use cubesql::compile::test::rewrite_engine::{
33
cube_context, query_to_logical_plan, rewrite_rules, rewrite_runner,
44
};
5+
use itertools::Itertools;
56
use std::sync::Arc;
67

78
macro_rules! bench_func {
@@ -348,9 +349,83 @@ pub fn power_bi_sum_wrap(c: &mut Criterion) {
348349
bench_func!("power_bi_sum_wrap", get_power_bi_sum_wrap(), c);
349350
}
350351

352+
fn get_simple_long_in_expr() -> String {
353+
const N: usize = 50;
354+
let set = (1..=N).join(", ");
355+
format!("SELECT * FROM NumberCube WHERE someNumber IN ({set})")
356+
}
357+
358+
pub fn long_simple_in_expr(c: &mut Criterion) {
359+
std::env::set_var("CUBESQL_SQL_PUSH_DOWN", "true");
360+
bench_func!("long_simple_in_expr", get_simple_long_in_expr(), c);
361+
}
362+
363+
fn get_long_in_expr() -> String {
364+
r#"
365+
SELECT
366+
"WideCube"."dim1" as "column1",
367+
"WideCube"."dim2" as "column2",
368+
"WideCube"."dim3" as "column3",
369+
"WideCube"."dim4" as "column4",
370+
"WideCube"."dim5" as "column5",
371+
"WideCube"."dim6" as "column6",
372+
"WideCube"."dim7" as "column7",
373+
"WideCube"."dim8" as "column8",
374+
"WideCube"."dim9" as "column9",
375+
"WideCube"."dim10" as "column10",
376+
"WideCube"."dim11" as "column11",
377+
"WideCube"."dim12" as "column12",
378+
"WideCube"."dim13" as "column13",
379+
"WideCube"."dim14" as "column14",
380+
"WideCube"."dim15" as "column15",
381+
SUM("WideCube"."dim16") as "some_sum"
382+
FROM
383+
"WideCube"
384+
WHERE
385+
"WideCube"."dim1" = 1
386+
AND "WideCube"."dim2" = 2
387+
AND "WideCube"."dim3" = 3
388+
AND "WideCube"."dim4" = 4
389+
AND "WideCube"."dim5" = 5
390+
AND "WideCube"."dim6" = 6
391+
AND "WideCube"."dim7" = 7
392+
AND "WideCube"."dim8" = 8
393+
AND "WideCube"."dim9" = 9
394+
AND "WideCube"."dim10" = 10
395+
AND ("WideCube"."dim11" = 42 OR "WideCube"."dim11" IS NULL)
396+
AND (
397+
"WideCube"."dim12" IN (
398+
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
399+
27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50
400+
) OR "WideCube"."dim12" IS NULL
401+
) AND "WideCube"."dim20" = 55
402+
GROUP BY
403+
"WideCube"."dim1",
404+
"WideCube"."dim2",
405+
"WideCube"."dim3",
406+
"WideCube"."dim4",
407+
"WideCube"."dim5",
408+
"WideCube"."dim6",
409+
"WideCube"."dim7",
410+
"WideCube"."dim8",
411+
"WideCube"."dim9",
412+
"WideCube"."dim10",
413+
"WideCube"."dim11",
414+
"WideCube"."dim12",
415+
"WideCube"."dim13",
416+
"WideCube"."dim14",
417+
"WideCube"."dim15"
418+
"#.into()
419+
}
420+
421+
pub fn long_in_expr(c: &mut Criterion) {
422+
std::env::set_var("CUBESQL_SQL_PUSH_DOWN", "true");
423+
bench_func!("long_in_expr", get_long_in_expr(), c);
424+
}
425+
351426
criterion_group! {
352427
name = benches;
353428
config = Criterion::default().measurement_time(std::time::Duration::from_secs(15)).sample_size(10);
354-
targets = split_query, split_query_count_distinct, wrapped_query, power_bi_wrap, power_bi_sum_wrap
429+
targets = split_query, split_query_count_distinct, wrapped_query, power_bi_wrap, power_bi_sum_wrap, long_in_expr, long_simple_in_expr
355430
}
356431
criterion_main!(benches);

rust/cubesql/cubesql/src/compile/mod.rs

Lines changed: 49 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -19422,6 +19422,9 @@ ORDER BY "source"."str0" ASC
1942219422

1942319423
#[tokio::test]
1942419424
async fn test_thoughtspot_where_binary_in_true_false() {
19425+
if !Rewriter::sql_push_down_enabled() {
19426+
return;
19427+
}
1942519428
init_logger();
1942619429

1942719430
let logical_plan = convert_select_to_query_plan(
@@ -19453,88 +19456,17 @@ ORDER BY "source"."str0" ASC
1945319456
.await
1945419457
.as_logical_plan();
1945519458

19456-
assert_eq!(
19457-
logical_plan.find_cube_scan().request,
19458-
V1LoadRequestQuery {
19459-
measures: Some(vec!["KibanaSampleDataEcommerce.count".to_string()]),
19460-
dimensions: Some(vec!["KibanaSampleDataEcommerce.customer_gender".to_string()]),
19461-
segments: Some(vec![]),
19462-
time_dimensions: None,
19463-
order: None,
19464-
limit: None,
19465-
offset: None,
19466-
filters: Some(vec![
19467-
V1LoadRequestQueryFilterItem {
19468-
member: None,
19469-
operator: None,
19470-
values: None,
19471-
or: Some(vec![
19472-
json!(V1LoadRequestQueryFilterItem {
19473-
member: None,
19474-
operator: None,
19475-
values: None,
19476-
or: None,
19477-
and: Some(vec![
19478-
json!(V1LoadRequestQueryFilterItem {
19479-
member: Some(
19480-
"KibanaSampleDataEcommerce.customer_gender".to_string()
19481-
),
19482-
operator: Some("startsWith".to_string()),
19483-
values: Some(vec!["female".to_string()]),
19484-
or: None,
19485-
and: None,
19486-
}),
19487-
json!(V1LoadRequestQueryFilterItem {
19488-
member: Some(
19489-
"KibanaSampleDataEcommerce.customer_gender".to_string()
19490-
),
19491-
operator: Some("endsWith".to_string()),
19492-
values: Some(vec!["female".to_string()]),
19493-
or: None,
19494-
and: None,
19495-
}),
19496-
]),
19497-
}),
19498-
json!(V1LoadRequestQueryFilterItem {
19499-
member: None,
19500-
operator: None,
19501-
values: None,
19502-
or: None,
19503-
and: Some(vec![
19504-
json!(V1LoadRequestQueryFilterItem {
19505-
member: Some(
19506-
"KibanaSampleDataEcommerce.customer_gender".to_string()
19507-
),
19508-
operator: Some("startsWith".to_string()),
19509-
values: Some(vec!["male".to_string()]),
19510-
or: None,
19511-
and: None,
19512-
}),
19513-
json!(V1LoadRequestQueryFilterItem {
19514-
member: Some(
19515-
"KibanaSampleDataEcommerce.customer_gender".to_string()
19516-
),
19517-
operator: Some("endsWith".to_string()),
19518-
values: Some(vec!["male".to_string()]),
19519-
or: None,
19520-
and: None,
19521-
}),
19522-
]),
19523-
}),
19524-
]),
19525-
and: None,
19526-
},
19527-
V1LoadRequestQueryFilterItem {
19528-
member: Some("KibanaSampleDataEcommerce.customer_gender".to_string()),
19529-
operator: Some("set".to_string()),
19530-
values: None,
19531-
or: None,
19532-
and: None,
19533-
},
19534-
]),
19535-
ungrouped: None,
19536-
}
19537-
)
19459+
// check if contains `(LOWER(..) = .. OR ..LOWER(..) = ..) IN (TRUE, FALSE)`
19460+
let re = Regex::new(r"\(LOWER ?\(.+\) = .+ OR .+LOWER ?\(.+\) = .+\) IN \(TRUE, FALSE\)")
19461+
.unwrap();
19462+
19463+
let sql = logical_plan
19464+
.find_cube_scan_wrapper()
19465+
.wrapped_sql
19466+
.unwrap()
19467+
.sql;
19468+
19469+
assert!(re.is_match(&sql));
1953819470
}
1953919471

1954019472
#[tokio::test]
@@ -22901,4 +22833,39 @@ LIMIT {{ limit }}{% endif %}"#.to_string(),
2290122833
displayable(physical_plan.as_ref()).indent()
2290222834
);
2290322835
}
22836+
22837+
#[tokio::test]
22838+
async fn test_long_in_expr() {
22839+
if !Rewriter::sql_push_down_enabled() {
22840+
return;
22841+
}
22842+
22843+
const N: usize = 50;
22844+
let set = (1..=N).join(", ");
22845+
22846+
let query = format!("SELECT * FROM NumberCube WHERE someNumber IN ({set})");
22847+
let query_plan = convert_select_to_query_plan(query, DatabaseProtocol::PostgreSQL).await;
22848+
let logical_plan = query_plan.as_logical_plan();
22849+
22850+
assert_eq!(
22851+
logical_plan.find_cube_scan().request,
22852+
V1LoadRequestQuery {
22853+
measures: Some(vec!["NumberCube.someNumber".into()]),
22854+
dimensions: Some(vec![]),
22855+
segments: Some(vec![]),
22856+
time_dimensions: None,
22857+
order: None,
22858+
limit: None,
22859+
offset: None,
22860+
filters: Some(vec![V1LoadRequestQueryFilterItem {
22861+
member: Some("NumberCube.someNumber".into()),
22862+
operator: Some("equals".into()),
22863+
values: Some((1..=N).map(|x| x.to_string()).collect()),
22864+
or: None,
22865+
and: None
22866+
}]),
22867+
ungrouped: Some(true),
22868+
}
22869+
);
22870+
}
2290422871
}

rust/cubesql/cubesql/src/compile/rewrite/converter.rs

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,16 @@ macro_rules! add_expr_list_node {
9696
}};
9797
}
9898

99+
macro_rules! add_expr_flat_list_node {
100+
($graph:expr, $value_expr:expr, $query_params:expr, $field_variant:ident) => {{
101+
let list = $value_expr
102+
.iter()
103+
.map(|expr| Self::add_expr_replace_params($graph, expr, $query_params))
104+
.collect::<Result<Vec<_>, _>>()?;
105+
$graph.add(LogicalPlanLanguage::$field_variant(list))
106+
}};
107+
}
108+
99109
macro_rules! add_binary_expr_list_node {
100110
($graph:expr, $value_expr:expr, $query_params:expr, $field_variant:ident) => {{
101111
fn to_binary_tree(
@@ -421,7 +431,7 @@ impl LogicalPlanToLanguageConverter {
421431
negated,
422432
} => {
423433
let expr = Self::add_expr_replace_params(graph, expr, query_params)?;
424-
let list = add_expr_list_node!(graph, list, query_params, InListExprList);
434+
let list = add_expr_flat_list_node!(graph, list, query_params, InListExprList);
425435
let negated = add_expr_data_node!(graph, negated, InListExprNegated);
426436
graph.add(LogicalPlanLanguage::InListExpr([expr, list, negated]))
427437
}

rust/cubesql/cubesql/src/compile/rewrite/mod.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -735,6 +735,13 @@ fn list_expr(list_type: impl Display, list: Vec<impl Display>) -> String {
735735
current
736736
}
737737

738+
#[allow(unused)]
739+
fn flat_list_expr(list_type: impl Display, list: Vec<impl Display>) -> String {
740+
use itertools::Itertools;
741+
let list = list.iter().join(" ");
742+
format!("({list_type} {list})")
743+
}
744+
738745
fn udf_expr(fun_name: impl Display, args: Vec<impl Display>) -> String {
739746
udf_expr_var_arg(fun_name, list_expr("ScalarUDFExprArgs", args))
740747
}

rust/cubesql/cubesql/src/compile/rewrite/rules/filters.rs

Lines changed: 1 addition & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ use crate::{
77
column_expr, cube_scan, cube_scan_filters, cube_scan_filters_empty_tail, cube_scan_members,
88
dimension_expr, expr_column_name, filter, filter_member, filter_op, filter_op_filters,
99
filter_op_filters_empty_tail, filter_replacer, filter_simplify_replacer, fun_expr,
10-
fun_expr_var_arg, inlist_expr, is_not_null_expr, is_null_expr, like_expr, limit, list_expr,
10+
fun_expr_var_arg, inlist_expr, is_not_null_expr, is_null_expr, like_expr, limit,
1111
literal_bool, literal_expr, literal_int, literal_string, measure_expr,
1212
member_name_by_alias, negative_expr, not_expr, projection, rewrite,
1313
rewriter::RewriteRules,
@@ -1674,56 +1674,6 @@ impl RewriteRules for FilterRules {
16741674
"?filter_aliases",
16751675
),
16761676
),
1677-
rewrite(
1678-
"filter-thoughtspot-lower-in-true-false",
1679-
filter_replacer(
1680-
inlist_expr(
1681-
binary_expr(
1682-
binary_expr(
1683-
fun_expr("Lower", vec![column_expr("?column")]),
1684-
"=",
1685-
literal_expr("?left_literal"),
1686-
),
1687-
"OR",
1688-
binary_expr(
1689-
fun_expr("Lower", vec![column_expr("?column")]),
1690-
"=",
1691-
literal_expr("?right_literal"),
1692-
),
1693-
),
1694-
list_expr(
1695-
"InListExprList",
1696-
vec![literal_bool(true), literal_bool(false)],
1697-
),
1698-
"InListExprNegated:false",
1699-
),
1700-
"?alias_to_cube",
1701-
"?members",
1702-
"?filter_aliases",
1703-
),
1704-
filter_replacer(
1705-
binary_expr(
1706-
binary_expr(
1707-
binary_expr(
1708-
fun_expr("Lower", vec![column_expr("?column")]),
1709-
"=",
1710-
literal_expr("?left_literal"),
1711-
),
1712-
"OR",
1713-
binary_expr(
1714-
fun_expr("Lower", vec![column_expr("?column")]),
1715-
"=",
1716-
literal_expr("?right_literal"),
1717-
),
1718-
),
1719-
"AND",
1720-
is_not_null_expr(column_expr("?column")),
1721-
),
1722-
"?alias_to_cube",
1723-
"?members",
1724-
"?filter_aliases",
1725-
),
1726-
),
17271677
transforming_rewrite(
17281678
"extract-year-equals",
17291679
filter_replacer(

0 commit comments

Comments
 (0)