Skip to content

Commit 43429eb

Browse files
rclappRyan Clappclaudeovr
authored
fix(schema-compiler): Use isNotDistinctFrom() for ClickHouse Tesseract multi-fact joins (#10494)
ClickHouse does not support OR/IS NULL patterns in JOIN ON clauses, which causes Tesseract multi-fact queries to fail. Add support for a function-style is_not_distinct_from expression template and register ClickHouse's isNotDistinctFrom() function as the implementation. Fixes #10493 --------- Co-authored-by: Ryan Clapp <ryan.clapp@getgarner.com> Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com> Co-authored-by: Dmitry Patsura <talk@dmtry.me>
1 parent 589d20f commit 43429eb

File tree

2 files changed

+121
-0
lines changed
  • packages/cubejs-schema-compiler/src/adapter
  • rust/cubesqlplanner/cubesqlplanner/src/planner/sql_templates

2 files changed

+121
-0
lines changed

packages/cubejs-schema-compiler/src/adapter/ClickHouseQuery.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,7 @@ export class ClickHouseQuery extends BaseQuery {
277277
// ClickHouse intervals have a distinct type for each granularity
278278
delete templates.types.interval;
279279
delete templates.types.binary;
280+
templates.expressions.is_not_distinct_from = 'isNotDistinctFrom({{ left }}, {{ right }})';
280281
return templates;
281282
}
282283
}

rust/cubesqlplanner/cubesqlplanner/src/planner/sql_templates/plan.rs

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -462,6 +462,12 @@ impl PlanSqlTemplates {
462462

463463
return self.binary_expr(left_column, &is_not_distinct_from_op, right_column);
464464
}
465+
if self.supports_is_not_distinct_from_expr() {
466+
return self.render.render_template(
467+
"expressions/is_not_distinct_from",
468+
context! { left => left_column.as_str(), right => right_column.as_str() },
469+
);
470+
}
465471
format!(
466472
" OR ({} AND {})",
467473
self.is_null_expr(&left_column, false)?,
@@ -486,6 +492,11 @@ impl PlanSqlTemplates {
486492
.contains_template("operators/is_not_distinct_from")
487493
}
488494

495+
pub fn supports_is_not_distinct_from_expr(&self) -> bool {
496+
self.render
497+
.contains_template("expressions/is_not_distinct_from")
498+
}
499+
489500
pub fn supports_generated_time_series(
490501
&self,
491502
predifined_granularity: bool,
@@ -798,3 +809,112 @@ impl PlanSqlTemplates {
798809
)
799810
}
800811
}
812+
813+
#[cfg(test)]
814+
mod tests {
815+
use super::*;
816+
use crate::test_fixtures::cube_bridge::{MockDriverTools, MockSqlTemplatesRender};
817+
use std::collections::HashMap;
818+
819+
fn plan_templates_with(extra_templates: Vec<(&str, &str)>) -> PlanSqlTemplates {
820+
if extra_templates.is_empty() {
821+
let driver_tools = Rc::new(MockDriverTools::new());
822+
return PlanSqlTemplates::try_new(driver_tools, false).unwrap();
823+
}
824+
// Build a minimal template set with only what join_by_dimension_conditions needs
825+
let mut t: HashMap<String, String> = HashMap::new();
826+
t.insert(
827+
"expressions/binary".to_string(),
828+
"({{ left }} {{ op }} {{ right }})".to_string(),
829+
);
830+
t.insert(
831+
"expressions/is_null".to_string(),
832+
"({{ expr }} IS {% if negate %}NOT {% endif %}NULL)".to_string(),
833+
);
834+
for (k, v) in extra_templates {
835+
t.insert(k.to_string(), v.to_string());
836+
}
837+
let render = MockSqlTemplatesRender::try_new(t).unwrap();
838+
let driver_tools = Rc::new(MockDriverTools::with_sql_templates(render));
839+
PlanSqlTemplates::try_new(driver_tools, false).unwrap()
840+
}
841+
842+
#[test]
843+
fn test_join_condition_no_null_check() {
844+
let templates = plan_templates_with(vec![]);
845+
let left = "t1.col".to_string();
846+
let right = "t2.col".to_string();
847+
848+
let result = templates
849+
.join_by_dimension_conditions(&left, &right, false)
850+
.unwrap();
851+
assert_eq!(result, "(t1.col = t2.col)");
852+
}
853+
854+
#[test]
855+
fn test_join_condition_null_check_fallback_or_is_null() {
856+
// No is_not_distinct_from templates → falls back to OR (IS NULL AND IS NULL)
857+
let templates = plan_templates_with(vec![]);
858+
let left = "t1.col".to_string();
859+
let right = "t2.col".to_string();
860+
861+
let result = templates
862+
.join_by_dimension_conditions(&left, &right, true)
863+
.unwrap();
864+
assert_eq!(
865+
result,
866+
"(t1.col = t2.col OR ((t1.col IS NULL) AND (t2.col IS NULL)))"
867+
);
868+
}
869+
870+
#[test]
871+
fn test_join_condition_null_check_binary_operator() {
872+
// Postgres/BigQuery/Snowflake style: binary operator IS NOT DISTINCT FROM
873+
let templates = plan_templates_with(vec![(
874+
"operators/is_not_distinct_from",
875+
"IS NOT DISTINCT FROM",
876+
)]);
877+
let left = "t1.col".to_string();
878+
let right = "t2.col".to_string();
879+
880+
let result = templates
881+
.join_by_dimension_conditions(&left, &right, true)
882+
.unwrap();
883+
assert_eq!(result, "(t1.col IS NOT DISTINCT FROM t2.col)");
884+
}
885+
886+
#[test]
887+
fn test_join_condition_null_check_expression_template() {
888+
// ClickHouse style: function-call isNotDistinctFrom(left, right)
889+
let templates = plan_templates_with(vec![(
890+
"expressions/is_not_distinct_from",
891+
"isNotDistinctFrom({{ left }}, {{ right }})",
892+
)]);
893+
let left = "t1.col".to_string();
894+
let right = "t2.col".to_string();
895+
896+
let result = templates
897+
.join_by_dimension_conditions(&left, &right, true)
898+
.unwrap();
899+
assert_eq!(result, "isNotDistinctFrom(t1.col, t2.col)");
900+
}
901+
902+
#[test]
903+
fn test_join_condition_binary_operator_takes_precedence_over_expression() {
904+
// When both templates exist, the binary operator should be used
905+
let templates = plan_templates_with(vec![
906+
("operators/is_not_distinct_from", "IS NOT DISTINCT FROM"),
907+
(
908+
"expressions/is_not_distinct_from",
909+
"isNotDistinctFrom({{ left }}, {{ right }})",
910+
),
911+
]);
912+
let left = "t1.col".to_string();
913+
let right = "t2.col".to_string();
914+
915+
let result = templates
916+
.join_by_dimension_conditions(&left, &right, true)
917+
.unwrap();
918+
assert_eq!(result, "(t1.col IS NOT DISTINCT FROM t2.col)");
919+
}
920+
}

0 commit comments

Comments
 (0)