Skip to content

Commit 2bda0dd

Browse files
authored
feat(cubesql): Support [I]LIKE ... ESCAPE ... SQL push down
1 parent 6f8b097 commit 2bda0dd

File tree

12 files changed

+332
-6
lines changed

12 files changed

+332
-6
lines changed

packages/cubejs-schema-compiler/src/adapter/BaseQuery.js

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3225,6 +3225,9 @@ export class BaseQuery {
32253225
not: 'NOT ({{ expr }})',
32263226
true: 'TRUE',
32273227
false: 'FALSE',
3228+
like: '{{ expr }} {% if negated %}NOT {% endif %}LIKE {{ pattern }}',
3229+
ilike: '{{ expr }} {% if negated %}NOT {% endif %}ILIKE {{ pattern }}',
3230+
like_escape: '{{ like_expr }} ESCAPE {{ escape_char }}',
32283231
},
32293232
quotes: {
32303233
identifiers: '"',

packages/cubejs-schema-compiler/src/adapter/BigqueryQuery.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,8 @@ export class BigqueryQuery extends BaseQuery {
249249
templates.expressions.interval = 'INTERVAL {{ interval }}';
250250
templates.expressions.extract = 'EXTRACT({% if date_part == \'DOW\' %}DAYOFWEEK{% elif date_part == \'DOY\' %}DAYOFYEAR{% else %}{{ date_part }}{% endif %} FROM {{ expr }})';
251251
templates.expressions.timestamp_literal = 'TIMESTAMP(\'{{ value }}\')';
252+
delete templates.expressions.ilike;
253+
delete templates.expressions.like_escape;
252254
templates.types.boolean = 'BOOL';
253255
templates.types.float = 'FLOAT64';
254256
templates.types.double = 'FLOAT64';

packages/cubejs-schema-compiler/src/adapter/ClickHouseQuery.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,7 @@ export class ClickHouseQuery extends BaseQuery {
272272
// TODO: Introduce additional filter in jinja? or parseDateTimeBestEffort?
273273
// https://github.com/ClickHouse/ClickHouse/issues/19351
274274
templates.expressions.timestamp_literal = 'parseDateTimeBestEffort(\'{{ value }}\')';
275+
delete templates.expressions.like_escape;
275276
templates.quotes.identifiers = '`';
276277
templates.quotes.escape = '\\`';
277278
templates.types.boolean = 'BOOL';

packages/cubejs-schema-compiler/src/adapter/MssqlQuery.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,7 @@ export class MssqlQuery extends BaseQuery {
223223
const templates = super.sqlTemplates();
224224
templates.functions.LEAST = 'LEAST({{ args_concat }})';
225225
templates.functions.GREATEST = 'GREATEST({{ args_concat }})';
226+
delete templates.expressions.ilike;
226227
templates.types.string = 'VARCHAR';
227228
templates.types.boolean = 'BIT';
228229
templates.types.integer = 'INT';

packages/cubejs-schema-compiler/src/adapter/MysqlQuery.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ export class MysqlQuery extends BaseQuery {
158158
const templates = super.sqlTemplates();
159159
templates.quotes.identifiers = '`';
160160
templates.quotes.escape = '\\`';
161+
delete templates.expressions.ilike;
161162
templates.types.string = 'VARCHAR';
162163
templates.types.boolean = 'TINYINT';
163164
templates.types.timestamp = 'DATETIME';

packages/cubejs-schema-compiler/src/adapter/PrestodbQuery.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ export class PrestodbQuery extends BaseQuery {
121121
templates.expressions.extract = 'EXTRACT({{ date_part }} FROM {{ expr }})';
122122
templates.expressions.interval_single_date_part = 'INTERVAL \'{{ num }}\' {{ date_part }}';
123123
templates.expressions.timestamp_literal = 'from_iso8601_timestamp(\'{{ value }}\')';
124+
delete templates.expressions.ilike;
124125
templates.types.string = 'VARCHAR';
125126
templates.types.float = 'REAL';
126127
// Presto intervals have a YearMonth or DayTime type variants, but no universal type

rust/cubesql/cubesql/src/compile/engine/df/wrapper.rs

Lines changed: 91 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ use crate::{
77
filters::Decimal,
88
utils::{DecomposedDayTime, DecomposedMonthDayNano},
99
},
10-
WrappedSelectType,
10+
LikeType, WrappedSelectType,
1111
},
1212
},
1313
config::ConfigObj,
@@ -1285,8 +1285,96 @@ impl CubeScanWrapperNode {
12851285
Ok((resulting_sql, sql_query))
12861286
}
12871287
// Expr::AnyExpr { .. } => {}
1288-
// Expr::Like(_) => {}-=
1289-
// Expr::ILike(_) => {}
1288+
Expr::Like(like) => {
1289+
let (expr, sql_query) = Self::generate_sql_for_expr(
1290+
plan.clone(),
1291+
sql_query,
1292+
sql_generator.clone(),
1293+
*like.expr,
1294+
ungrouped_scan_node.clone(),
1295+
subqueries.clone(),
1296+
)
1297+
.await?;
1298+
let (pattern, sql_query) = Self::generate_sql_for_expr(
1299+
plan.clone(),
1300+
sql_query,
1301+
sql_generator.clone(),
1302+
*like.pattern,
1303+
ungrouped_scan_node.clone(),
1304+
subqueries.clone(),
1305+
)
1306+
.await?;
1307+
let (escape_char, sql_query) = match like.escape_char {
1308+
Some(escape_char) => {
1309+
let (escape_char, sql_query) = Self::generate_sql_for_expr(
1310+
plan.clone(),
1311+
sql_query,
1312+
sql_generator.clone(),
1313+
Expr::Literal(ScalarValue::Utf8(Some(escape_char.to_string()))),
1314+
ungrouped_scan_node.clone(),
1315+
subqueries.clone(),
1316+
)
1317+
.await?;
1318+
(Some(escape_char), sql_query)
1319+
}
1320+
None => (None, sql_query),
1321+
};
1322+
let resulting_sql = sql_generator
1323+
.get_sql_templates()
1324+
.like_expr(LikeType::Like, expr, like.negated, pattern, escape_char)
1325+
.map_err(|e| {
1326+
DataFusionError::Internal(format!(
1327+
"Can't generate SQL for like expr: {}",
1328+
e
1329+
))
1330+
})?;
1331+
Ok((resulting_sql, sql_query))
1332+
}
1333+
Expr::ILike(ilike) => {
1334+
let (expr, sql_query) = Self::generate_sql_for_expr(
1335+
plan.clone(),
1336+
sql_query,
1337+
sql_generator.clone(),
1338+
*ilike.expr,
1339+
ungrouped_scan_node.clone(),
1340+
subqueries.clone(),
1341+
)
1342+
.await?;
1343+
let (pattern, sql_query) = Self::generate_sql_for_expr(
1344+
plan.clone(),
1345+
sql_query,
1346+
sql_generator.clone(),
1347+
*ilike.pattern,
1348+
ungrouped_scan_node.clone(),
1349+
subqueries.clone(),
1350+
)
1351+
.await?;
1352+
let (escape_char, sql_query) = match ilike.escape_char {
1353+
Some(escape_char) => {
1354+
let (escape_char, sql_query) = Self::generate_sql_for_expr(
1355+
plan.clone(),
1356+
sql_query,
1357+
sql_generator.clone(),
1358+
Expr::Literal(ScalarValue::Utf8(Some(escape_char.to_string()))),
1359+
ungrouped_scan_node.clone(),
1360+
subqueries.clone(),
1361+
)
1362+
.await?;
1363+
(Some(escape_char), sql_query)
1364+
}
1365+
None => (None, sql_query),
1366+
};
1367+
let resulting_sql = sql_generator
1368+
.get_sql_templates()
1369+
.like_expr(LikeType::ILike, expr, ilike.negated, pattern, escape_char)
1370+
.map_err(|e| {
1371+
DataFusionError::Internal(format!(
1372+
"Can't generate SQL for ilike expr: {}",
1373+
e
1374+
))
1375+
})?;
1376+
Ok((resulting_sql, sql_query))
1377+
}
12901378
// Expr::SimilarTo(_) => {}
12911379
Expr::Not(expr) => {
12921380
let (expr, sql_query) = Self::generate_sql_for_expr(

rust/cubesql/cubesql/src/compile/mod.rs

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18440,4 +18440,56 @@ LIMIT {{ limit }}{% endif %}"#.to_string(),
1844018440

1844118441
Ok(())
1844218442
}
18443+
18444+
#[tokio::test]
18445+
async fn test_thoughtspot_like_escape_push_down() {
18446+
if !Rewriter::sql_push_down_enabled() {
18447+
return;
18448+
}
18449+
init_testing_logger();
18450+
18451+
let query_plan = convert_select_to_query_plan(
18452+
r#"
18453+
SELECT CAST("customer_gender" AS TEXT) AS "customer_gender"
18454+
FROM "public"."KibanaSampleDataEcommerce"
18455+
WHERE
18456+
"customer_gender" LIKE (
18457+
'%' || replace(
18458+
replace(
18459+
replace(
18460+
'ale',
18461+
'!',
18462+
'!!'
18463+
),
18464+
'%',
18465+
'!%'
18466+
),
18467+
'_',
18468+
'!_'
18469+
) || '%'
18470+
) ESCAPE '!'
18471+
GROUP BY 1
18472+
ORDER BY 1
18473+
LIMIT 100
18474+
"#
18475+
.to_string(),
18476+
DatabaseProtocol::PostgreSQL,
18477+
)
18478+
.await;
18479+
18480+
let logical_plan = query_plan.as_logical_plan();
18481+
let sql = logical_plan
18482+
.find_cube_scan_wrapper()
18483+
.wrapped_sql
18484+
.unwrap()
18485+
.sql;
18486+
assert!(sql.contains("LIKE "));
18487+
assert!(sql.contains("ESCAPE "));
18488+
18489+
let physical_plan = query_plan.as_physical_plan().await.unwrap();
18490+
println!(
18491+
"Physical plan: {}",
18492+
displayable(physical_plan.as_ref()).indent()
18493+
);
18494+
}
1844318495
}
Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
use crate::{
2+
compile::rewrite::{
3+
analysis::LogicalPlanAnalysis, like_expr, rewrite, rules::wrapper::WrapperRules,
4+
transforming_rewrite, wrapper_pullup_replacer, wrapper_pushdown_replacer,
5+
LikeExprEscapeChar, LikeExprLikeType, LikeType, LogicalPlanLanguage,
6+
WrapperPullupReplacerAliasToCube,
7+
},
8+
var, var_iter,
9+
};
10+
use egg::{EGraph, Rewrite, Subst};
11+
12+
impl WrapperRules {
13+
pub fn like_expr_rules(
14+
&self,
15+
rules: &mut Vec<Rewrite<LogicalPlanLanguage, LogicalPlanAnalysis>>,
16+
) {
17+
rules.extend(vec![
18+
rewrite(
19+
"wrapper-push-down-like-expr",
20+
wrapper_pushdown_replacer(
21+
like_expr(
22+
"?like_type",
23+
"?negated",
24+
"?expr",
25+
"?pattern",
26+
"?escape_char",
27+
),
28+
"?alias_to_cube",
29+
"?ungrouped",
30+
"?in_projection",
31+
"?cube_members",
32+
),
33+
like_expr(
34+
"?like_type",
35+
"?negated",
36+
wrapper_pushdown_replacer(
37+
"?expr",
38+
"?alias_to_cube",
39+
"?ungrouped",
40+
"?in_projection",
41+
"?cube_members",
42+
),
43+
wrapper_pushdown_replacer(
44+
"?pattern",
45+
"?alias_to_cube",
46+
"?ungrouped",
47+
"?in_projection",
48+
"?cube_members",
49+
),
50+
"?escape_char",
51+
),
52+
),
53+
transforming_rewrite(
54+
"wrapper-pull-up-like-expr",
55+
like_expr(
56+
"?like_type",
57+
"?negated",
58+
wrapper_pullup_replacer(
59+
"?expr",
60+
"?alias_to_cube",
61+
"?ungrouped",
62+
"?in_projection",
63+
"?cube_members",
64+
),
65+
wrapper_pullup_replacer(
66+
"?pattern",
67+
"?alias_to_cube",
68+
"?ungrouped",
69+
"?in_projection",
70+
"?cube_members",
71+
),
72+
"?escape_char",
73+
),
74+
wrapper_pullup_replacer(
75+
like_expr(
76+
"?like_type",
77+
"?negated",
78+
"?expr",
79+
"?pattern",
80+
"?escape_char",
81+
),
82+
"?alias_to_cube",
83+
"?ungrouped",
84+
"?in_projection",
85+
"?cube_members",
86+
),
87+
self.transform_like_expr("?alias_to_cube", "?like_type", "?escape_char"),
88+
),
89+
]);
90+
}
91+
92+
fn transform_like_expr(
93+
&self,
94+
alias_to_cube_var: &'static str,
95+
like_type_var: &'static str,
96+
escape_char_var: &'static str,
97+
) -> impl Fn(&mut EGraph<LogicalPlanLanguage, LogicalPlanAnalysis>, &mut Subst) -> bool {
98+
let alias_to_cube_var = var!(alias_to_cube_var);
99+
let like_type_var = var!(like_type_var);
100+
let escape_char_var = var!(escape_char_var);
101+
let meta = self.meta_context.clone();
102+
move |egraph, subst| {
103+
for alias_to_cube in var_iter!(
104+
egraph[subst[alias_to_cube_var]],
105+
WrapperPullupReplacerAliasToCube
106+
) {
107+
let Some(sql_generator) = meta.sql_generator_by_alias_to_cube(&alias_to_cube)
108+
else {
109+
continue;
110+
};
111+
112+
let templates = &sql_generator.get_sql_templates().templates;
113+
114+
for escape_char in var_iter!(egraph[subst[escape_char_var]], LikeExprEscapeChar) {
115+
if escape_char.is_some() {
116+
if !templates.contains_key("expressions/like_escape") {
117+
continue;
118+
}
119+
}
120+
121+
for like_type in var_iter!(egraph[subst[like_type_var]], LikeExprLikeType) {
122+
let expression_name = match like_type {
123+
LikeType::Like => "like",
124+
LikeType::ILike => "ilike",
125+
_ => continue,
126+
};
127+
if templates.contains_key(&format!("expressions/{}", expression_name)) {
128+
return true;
129+
}
130+
}
131+
}
132+
}
133+
false
134+
}
135+
}
136+
}

rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ mod filter;
1212
mod in_list_expr;
1313
mod in_subquery_expr;
1414
mod is_null_expr;
15+
mod like_expr;
1516
mod limit;
1617
mod literal;
1718
mod negative_expr;
@@ -82,6 +83,7 @@ impl RewriteRules for WrapperRules {
8283
self.negative_expr_rules(&mut rules);
8384
self.not_expr_rules(&mut rules);
8485
self.distinct_rules(&mut rules);
86+
self.like_expr_rules(&mut rules);
8587

8688
rules
8789
}

0 commit comments

Comments
 (0)