diff --git a/packages/cubejs-schema-compiler/test/integration/postgres/bucketing.test.ts b/packages/cubejs-schema-compiler/test/integration/postgres/bucketing.test.ts index b2928e7ad2e98..7d90fde7178db 100644 --- a/packages/cubejs-schema-compiler/test/integration/postgres/bucketing.test.ts +++ b/packages/cubejs-schema-compiler/test/integration/postgres/bucketing.test.ts @@ -84,6 +84,27 @@ cubes: type: string add_group_by: [orders.customerId] + - name: changeTypeComplexWithJoin + sql: > + CASE + WHEN {revenueYearAgo} IS NULL THEN 'New' + WHEN {revenue} > {revenueYearAgo} THEN 'Grow' + ELSE 'Down' + END + multi_stage: true + type: string + add_group_by: [first_date.customerId] + + - name: changeTypeConcat + sql: "CONCAT({changeTypeComplex}, '-test')" + type: string + multi_stage: true + + - name: twoDimsConcat + sql: "CONCAT({changeTypeComplex}, '-', {first_date.customerType2})" + type: string + multi_stage: true + measures: - name: count @@ -111,6 +132,55 @@ cubes: END type: string + - name: first_date + sql: > + SELECT 1 AS id, '2023-03-01T00:00:00Z'::timestamptz AS createdAt, 1 AS customerId UNION ALL + SELECT 8 AS id, '2023-09-01T00:00:00Z'::timestamptz AS createdAt, 2 AS customerId UNION ALL + SELECT 16 AS id, '2024-09-01T00:00:00Z'::timestamptz AS createdAt, 3 AS customerId UNION ALL + SELECT 23 AS id, '2025-03-01T00:00:00Z'::timestamptz AS createdAt, 4 AS customerId UNION ALL + SELECT 29 AS id, '2025-03-01T00:00:00Z'::timestamptz AS createdAt, 5 AS customerId UNION ALL + SELECT 36 AS id, '2025-09-01T00:00:00Z'::timestamptz AS createdAt, 6 AS customerId + + joins: + - name: orders + sql: "{first_date.customerId} = {orders.customerId}" + relationship: one_to_many + + dimensions: + - name: customerId + sql: customerId + type: number + + - name: createdAt + sql: createdAt + type: time + + - name: customerType + sql: > + CASE + WHEN {orders.revenue} < 10000 THEN 'Low' + WHEN {orders.revenue} < 20000 THEN 'Medium' + ELSE 'Top' + END + multi_stage: true + type: string + add_group_by: [first_date.customerId] + + - name: customerType2 + sql: > + CASE + WHEN {orders.revenue} < 3000 THEN 'Low' + ELSE 'Top' + END + multi_stage: true + type: string + add_group_by: [first_date.customerId] + + - name: customerTypeConcat + sql: "CONCAT('Customer type: ', {customerType})" + multi_stage: true + type: string + add_group_by: [first_date.customerId] `); @@ -242,6 +312,205 @@ cubes: }, ], { joinGraph, cubeEvaluator, compiler })); + it('bucketing with dimension over complex dimension', async () => dbRunner.runQueryTest({ + dimensions: ['orders.changeTypeConcat'], + measures: ['orders.revenue', 'orders.revenueYearAgo'], + timeDimensions: [ + { + dimension: 'orders.createdAt', + granularity: 'year', + dateRange: ['2024-01-02T00:00:00', '2026-01-01T00:00:00'] + } + ], + timezone: 'UTC', + order: [{ + id: 'orders.changeTypeConcat' + }, { id: 'orders.createdAt' }], + }, + [ + { + orders__change_type_concat: 'Down-test', + orders__created_at_year: '2024-01-01T00:00:00.000Z', + orders__revenue: '20400', + orders__revenue_year_ago: '22800' + }, + { + orders__change_type_concat: 'Down-test', + orders__created_at_year: '2025-01-01T00:00:00.000Z', + orders__revenue: '17800', + orders__revenue_year_ago: '20400' + }, + { + orders__change_type_concat: 'Grow-test', + orders__created_at_year: '2024-01-01T00:00:00.000Z', + orders__revenue: '11700', + orders__revenue_year_ago: '9400' + }, + { + orders__change_type_concat: 'Grow-test', + orders__created_at_year: '2025-01-01T00:00:00.000Z', + orders__revenue: '14100', + orders__revenue_year_ago: '11700' + }, + ], + { joinGraph, cubeEvaluator, compiler })); + it('bucketing with join and bucket dimension', async () => dbRunner.runQueryTest({ + dimensions: ['orders.changeTypeComplexWithJoin'], + measures: ['orders.revenue', 'orders.revenueYearAgo'], + timeDimensions: [ + { + dimension: 'orders.createdAt', + granularity: 'year', + dateRange: ['2024-01-02T00:00:00', '2026-01-01T00:00:00'] + } + ], + timezone: 'UTC', + order: [{ + id: 'orders.changeTypeComplexWithJoin' + }, { id: 'orders.createdAt' }], + }, + [ + { + orders__change_type_complex_with_join: 'Down', + orders__created_at_year: '2024-01-01T00:00:00.000Z', + orders__revenue: '20400', + orders__revenue_year_ago: '22800' + }, + { + orders__change_type_complex_with_join: 'Down', + orders__created_at_year: '2025-01-01T00:00:00.000Z', + orders__revenue: '17800', + orders__revenue_year_ago: '20400' + }, + { + orders__change_type_complex_with_join: 'Grow', + orders__created_at_year: '2024-01-01T00:00:00.000Z', + orders__revenue: '11700', + orders__revenue_year_ago: '9400' + }, + { + orders__change_type_complex_with_join: 'Grow', + orders__created_at_year: '2025-01-01T00:00:00.000Z', + orders__revenue: '14100', + orders__revenue_year_ago: '11700' + }, + ], + { joinGraph, cubeEvaluator, compiler })); + it('bucketing dim reference other cube measure', async () => dbRunner.runQueryTest({ + dimensions: ['first_date.customerType'], + measures: ['orders.revenue'], + timezone: 'UTC', + order: [{ + id: 'first_date.customerType' + }], + }, + [ + { first_date__customer_type: 'Low', orders__revenue: '8100' }, + { first_date__customer_type: 'Medium', orders__revenue: '41700' }, + { first_date__customer_type: 'Top', orders__revenue: '46400' } + ], + { joinGraph, cubeEvaluator, compiler })); + it('bucketing with two dimensions', async () => dbRunner.runQueryTest({ + dimensions: ['orders.changeTypeConcat', 'first_date.customerType2'], + measures: ['orders.revenue', 'orders.revenueYearAgo'], + timeDimensions: [ + { + dimension: 'orders.createdAt', + granularity: 'year', + dateRange: ['2024-01-02T00:00:00', '2026-01-01T00:00:00'] + } + ], + timezone: 'UTC', + order: [{ + id: 'orders.changeTypeConcat' + }, { id: 'orders.createdAt' }], + }, + [ + { + orders__change_type_concat: 'Down-test', + first_date__customer_type2: 'Top', + orders__created_at_year: '2024-01-01T00:00:00.000Z', + orders__revenue: '20400', + orders__revenue_year_ago: '22800' + }, + { + orders__change_type_concat: 'Down-test', + first_date__customer_type2: 'Top', + orders__created_at_year: '2025-01-01T00:00:00.000Z', + orders__revenue: '17800', + orders__revenue_year_ago: '20400' + }, + { + orders__change_type_concat: 'Grow-test', + first_date__customer_type2: 'Low', + orders__created_at_year: '2024-01-01T00:00:00.000Z', + orders__revenue: '2700', + orders__revenue_year_ago: '2100' + }, + { + orders__change_type_concat: 'Grow-test', + first_date__customer_type2: 'Top', + orders__created_at_year: '2024-01-01T00:00:00.000Z', + orders__revenue: '9000', + orders__revenue_year_ago: '7300' + }, + { + orders__change_type_concat: 'Grow-test', + first_date__customer_type2: 'Top', + orders__created_at_year: '2025-01-01T00:00:00.000Z', + orders__revenue: '14100', + orders__revenue_year_ago: '11700' + } + ], + { joinGraph, cubeEvaluator, compiler })); + it('bucketing with two dims concacted', async () => dbRunner.runQueryTest({ + dimensions: ['orders.twoDimsConcat'], + measures: ['orders.revenue', 'orders.revenueYearAgo'], + timeDimensions: [ + { + dimension: 'orders.createdAt', + granularity: 'year', + dateRange: ['2024-01-02T00:00:00', '2026-01-01T00:00:00'] + } + ], + timezone: 'UTC', + order: [{ + id: 'orders.twoDimsConcat' + }, { id: 'orders.createdAt' }], + }, + [ + { + orders__two_dims_concat: 'Down-Top', + orders__created_at_year: '2024-01-01T00:00:00.000Z', + orders__revenue: '20400', + orders__revenue_year_ago: '22800' + }, + { + orders__two_dims_concat: 'Down-Top', + orders__created_at_year: '2025-01-01T00:00:00.000Z', + orders__revenue: '17800', + orders__revenue_year_ago: '20400' + }, + { + orders__two_dims_concat: 'Grow-Low', + orders__created_at_year: '2024-01-01T00:00:00.000Z', + orders__revenue: '2700', + orders__revenue_year_ago: '2100' + }, + { + orders__two_dims_concat: 'Grow-Top', + orders__created_at_year: '2024-01-01T00:00:00.000Z', + orders__revenue: '9000', + orders__revenue_year_ago: '7300' + }, + { + orders__two_dims_concat: 'Grow-Top', + orders__created_at_year: '2025-01-01T00:00:00.000Z', + orders__revenue: '14100', + orders__revenue_year_ago: '11700' + } + ], + { joinGraph, cubeEvaluator, compiler })); } else { // This test is working only in tesseract test.skip('multi stage over sub query', () => { expect(1).toBe(1); }); diff --git a/rust/cubesqlplanner/cubesqlplanner/src/logical_plan/full_key_aggregate.rs b/rust/cubesqlplanner/cubesqlplanner/src/logical_plan/full_key_aggregate.rs index fc03c78f9fce5..782d7078bc8ab 100644 --- a/rust/cubesqlplanner/cubesqlplanner/src/logical_plan/full_key_aggregate.rs +++ b/rust/cubesqlplanner/cubesqlplanner/src/logical_plan/full_key_aggregate.rs @@ -96,6 +96,10 @@ impl FullKeyAggregate { pub fn multi_stage_subquery_refs(&self) -> &Vec> { &self.multi_stage_subquery_refs } + + pub fn is_empty(&self) -> bool { + self.multi_stage_subquery_refs.is_empty() && self.multiplied_measures_resolver.is_none() + } } impl LogicalNode for FullKeyAggregate { diff --git a/rust/cubesqlplanner/cubesqlplanner/src/physical_plan_builder/processors/query.rs b/rust/cubesqlplanner/cubesqlplanner/src/physical_plan_builder/processors/query.rs index edccabe150385..414290e0bf154 100644 --- a/rust/cubesqlplanner/cubesqlplanner/src/physical_plan_builder/processors/query.rs +++ b/rust/cubesqlplanner/cubesqlplanner/src/physical_plan_builder/processors/query.rs @@ -17,7 +17,7 @@ pub struct QueryProcessor<'a> { impl QueryProcessor<'_> { fn is_over_full_aggregated_source(&self, logical_plan: &Query) -> bool { match logical_plan.source() { - QuerySource::FullKeyAggregate(_) => true, + QuerySource::FullKeyAggregate(fk) => !fk.is_empty(), QuerySource::PreAggregation(_) => false, QuerySource::LogicalJoin(_) => false, } diff --git a/rust/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/member_query_planner.rs b/rust/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/member_query_planner.rs index 3722bf9b13376..1d1382903dae6 100644 --- a/rust/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/member_query_planner.rs +++ b/rust/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/member_query_planner.rs @@ -294,6 +294,20 @@ impl MultiStageMemberQueryPlanner { MemberSymbol::Measure(_) => measures.push(cte_member.clone()), _ => {} } + // We add all non–multi-stage dimensions from the underlying states because + // they’re needed to join a multi-stage dimension into the measure query + let (all_dependend_dimensions, all_dependend_time_dimensions) = + self.description.collect_all_non_multi_stage_dimension()?; + dimensions.extend(all_dependend_dimensions.iter().cloned()); + time_dimensions.extend(all_dependend_time_dimensions.iter().cloned()); + dimensions = dimensions + .into_iter() + .unique_by(|d| d.full_name()) + .collect_vec(); + time_dimensions = time_dimensions + .into_iter() + .unique_by(|d| d.full_name()) + .collect_vec(); let schema = LogicalSchema::default() .set_dimensions(dimensions) diff --git a/rust/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/multi_stage_query_planner.rs b/rust/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/multi_stage_query_planner.rs index 00c0e3a05fecf..4478a5c6a4c3d 100644 --- a/rust/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/multi_stage_query_planner.rs +++ b/rust/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/multi_stage_query_planner.rs @@ -219,7 +219,7 @@ impl MultiStageQueryPlanner { descriptions, resolved_multi_stage_dimensions, )?; - if !description.is_multi_stage_dimension() { + if !description.is_multi_stage_dimension() || member.as_dimension().is_ok() { result.push(description); } } @@ -403,17 +403,6 @@ impl MultiStageQueryPlanner { resolved_multi_stage_dimensions, )?; - // Add GROUP BY to the dimension subquery itself - // if a multi-stage dimension has the `add_group_by` field. - let self_state = - if !multi_stage_member.add_group_by_symbols().is_empty() && member.is_dimension() { - let mut self_state = state.clone_state(); - self_state.add_dimensions(multi_stage_member.add_group_by_symbols().clone()); - Rc::new(self_state) - } else { - state.clone() - }; - let alias = format!("cte_{}", descriptions.len()); MultiStageQueryDescription::new( MultiStageMember::new( @@ -422,7 +411,7 @@ impl MultiStageQueryPlanner { is_ungrupped, false, ), - self_state, + state.clone(), input, alias.clone(), ) diff --git a/rust/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/query_description.rs b/rust/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/query_description.rs index d6db16f4f3aa7..db7de4bc17284 100644 --- a/rust/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/query_description.rs +++ b/rust/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/query_description.rs @@ -1,6 +1,8 @@ use super::{MultiStageAppliedState, MultiStageMember}; use crate::logical_plan::LogicalSchema; use crate::planner::sql_evaluator::MemberSymbol; +use cubenativeutils::CubeError; +use itertools::Itertools; use std::fmt::Debug; use std::rc::Rc; @@ -79,6 +81,56 @@ impl MultiStageQueryDescription { self.input.is_empty() } + pub fn collect_all_non_multi_stage_dimension( + &self, + ) -> Result<(Vec>, Vec>), CubeError> { + let mut dimensions = vec![]; + let mut time_dimensions = vec![]; + self.collect_all_non_multi_stage_dimension_impl(&mut dimensions, &mut time_dimensions); + let dimensions = dimensions + .into_iter() + .unique_by(|d| d.full_name()) + .filter_map(|d| match d.is_basic_dimension() { + Ok(res) => { + if res { + None + } else { + Some(Ok(d)) + } + } + Err(e) => Some(Err(e)), + }) + .collect::, _>>()?; + + let time_dimensions = time_dimensions + .into_iter() + .unique_by(|d| d.full_name()) + .filter_map(|d| match d.is_basic_dimension() { + Ok(res) => { + if res { + None + } else { + Some(Ok(d)) + } + } + Err(e) => Some(Err(e)), + }) + .collect::, _>>()?; + Ok((dimensions, time_dimensions)) + } + + fn collect_all_non_multi_stage_dimension_impl( + &self, + dimensions: &mut Vec>, + time_dimensions: &mut Vec>, + ) { + dimensions.extend(self.state.dimensions_symbols().iter().cloned()); + time_dimensions.extend(self.state.time_dimensions_symbols().iter().cloned()); + for child in self.input.iter() { + child.collect_all_non_multi_stage_dimension_impl(dimensions, time_dimensions); + } + } + pub fn is_match_member_and_state( &self, member_node: &Rc, diff --git a/rust/cubesqlplanner/cubesqlplanner/src/planner/sql_evaluator/collectors/join_hints_collector.rs b/rust/cubesqlplanner/cubesqlplanner/src/planner/sql_evaluator/collectors/join_hints_collector.rs index 18060e82cb9e7..3d97d316f82dd 100644 --- a/rust/cubesqlplanner/cubesqlplanner/src/planner/sql_evaluator/collectors/join_hints_collector.rs +++ b/rust/cubesqlplanner/cubesqlplanner/src/planner/sql_evaluator/collectors/join_hints_collector.rs @@ -27,7 +27,20 @@ impl TraversalVisitor for JoinHintsCollector { _: &Self::State, ) -> Result, CubeError> { if node.is_multi_stage() { - //We don't add multi-stage members childs to join hints + if let Ok(dim) = node.as_dimension() { + if let Some(add_group_by) = dim.add_group_by() { + for item in add_group_by.iter() { + self.apply(item, &())?; + } + } + for (dep, path) in dim.get_dependencies_with_path().into_iter() { + if let Ok(dim) = dep.as_dimension() { + if dim.is_multi_stage() { + self.on_node_traverse(&dep, &path, &())?; + } + } + } + } return Ok(None); } diff --git a/rust/cubesqlplanner/cubesqlplanner/src/planner/sql_evaluator/symbols/dimension_symbol.rs b/rust/cubesqlplanner/cubesqlplanner/src/planner/sql_evaluator/symbols/dimension_symbol.rs index 897c22bc5c890..a2aed5b7785f2 100644 --- a/rust/cubesqlplanner/cubesqlplanner/src/planner/sql_evaluator/symbols/dimension_symbol.rs +++ b/rust/cubesqlplanner/cubesqlplanner/src/planner/sql_evaluator/symbols/dimension_symbol.rs @@ -238,11 +238,6 @@ impl DimensionSymbol { if let Some(member_sql) = &self.longitude { member_sql.extract_symbol_deps(&mut deps); } - if let Some(add_group_by) = &self.add_group_by { - for member_sql in add_group_by { - deps.extend(member_sql.get_dependencies().into_iter()); - } - } if let Some(case) = &self.case { case.extract_symbol_deps(&mut deps); } @@ -263,11 +258,6 @@ impl DimensionSymbol { if let Some(case) = &self.case { case.extract_symbol_deps_with_path(&mut deps); } - if let Some(add_group_by) = &self.add_group_by { - for member_sql in add_group_by { - deps.extend(member_sql.get_dependencies_with_path().into_iter()); - } - } deps } diff --git a/rust/cubesqlplanner/cubesqlplanner/src/planner/sql_evaluator/symbols/member_symbol.rs b/rust/cubesqlplanner/cubesqlplanner/src/planner/sql_evaluator/symbols/member_symbol.rs index 9f4f21f39bc8a..5408894fa8816 100644 --- a/rust/cubesqlplanner/cubesqlplanner/src/planner/sql_evaluator/symbols/member_symbol.rs +++ b/rust/cubesqlplanner/cubesqlplanner/src/planner/sql_evaluator/symbols/member_symbol.rs @@ -1,5 +1,6 @@ use cubenativeutils::CubeError; +use crate::planner::sql_evaluator::collectors::has_multi_stage_members; use crate::planner::sql_evaluator::Case; use super::{ @@ -311,6 +312,14 @@ impl MemberSymbol { } } + pub fn is_basic_dimension(self: &Rc) -> Result { + if self.as_dimension().is_ok() { + has_multi_stage_members(self, true) + } else { + Ok(false) + } + } + pub fn is_leaf(&self) -> bool { self.get_dependencies().is_empty() }