From bd1e04a0d48fdd91efb778e8eceb667481cac6ae Mon Sep 17 00:00:00 2001 From: Alex Qyoun-ae <4062971+MazterQyou@users.noreply.github.com> Date: Tue, 8 Jul 2025 00:48:55 +0400 Subject: [PATCH] fix(cubesql): Normalize `EXTRACT`/`DATE_TRUNC` granularities Signed-off-by: Alex Qyoun-ae <4062971+MazterQyou@users.noreply.github.com> --- .../src/compile/engine/df/optimizers/mod.rs | 2 + .../engine/df/optimizers/plan_normalize.rs | 1086 +++++++++++++++++ rust/cubesql/cubesql/src/compile/mod.rs | 14 +- .../cubesql/src/compile/query_engine.rs | 7 +- .../src/compile/rewrite/rules/dates.rs | 2 +- .../src/compile/rewrite/rules/filters.rs | 30 +- .../src/compile/rewrite/rules/old_split.rs | 122 +- .../rewrite/rules/split/granularity.rs | 28 +- rust/cubesql/cubesql/src/compile/router.rs | 6 +- ...ile__tests__test_extract_string_field.snap | 4 +- .../cubesql/src/compile/test/test_wrapper.rs | 2 +- rust/cubesql/cubesql/src/sql/statement.rs | 66 +- 12 files changed, 1183 insertions(+), 186 deletions(-) create mode 100644 rust/cubesql/cubesql/src/compile/engine/df/optimizers/plan_normalize.rs diff --git a/rust/cubesql/cubesql/src/compile/engine/df/optimizers/mod.rs b/rust/cubesql/cubesql/src/compile/engine/df/optimizers/mod.rs index b25aca3d15a1c..d480a5b291407 100644 --- a/rust/cubesql/cubesql/src/compile/engine/df/optimizers/mod.rs +++ b/rust/cubesql/cubesql/src/compile/engine/df/optimizers/mod.rs @@ -3,9 +3,11 @@ pub mod utils; mod filter_push_down; mod filter_split_meta; mod limit_push_down; +mod plan_normalize; mod sort_push_down; pub use filter_push_down::FilterPushDown; pub use filter_split_meta::FilterSplitMeta; pub use limit_push_down::LimitPushDown; +pub use plan_normalize::PlanNormalize; pub use sort_push_down::SortPushDown; diff --git a/rust/cubesql/cubesql/src/compile/engine/df/optimizers/plan_normalize.rs b/rust/cubesql/cubesql/src/compile/engine/df/optimizers/plan_normalize.rs new file mode 100644 index 0000000000000..57c7c97da72c7 --- /dev/null +++ b/rust/cubesql/cubesql/src/compile/engine/df/optimizers/plan_normalize.rs @@ -0,0 +1,1086 @@ +use std::{collections::HashMap, sync::Arc}; + +use datafusion::{ + error::{DataFusionError, Result}, + logical_expr::{BuiltinScalarFunction, Expr, GroupingSet, Like}, + logical_plan::{ + build_join_schema, build_table_udf_schema, + plan::{ + Aggregate, Analyze, CreateMemoryTable, CrossJoin, Distinct, Explain, Filter, Join, + Limit, Partitioning, Projection, Repartition, Sort, Subquery, TableScan, TableUDFs, + Union, Values, Window, + }, + union_with_alias, Column, DFSchema, LogicalPlan, LogicalPlanBuilder, + }, + optimizer::optimizer::{OptimizerConfig, OptimizerRule}, + scalar::ScalarValue, +}; + +use crate::compile::rewrite::rules::utils::DatePartToken; + +/// PlanNormalize optimizer rule walks through the query and applies transformations +/// to normalize the logical plan structure and expressions. +/// +/// Currently this includes replacing literal granularities in `DatePart` and `DateTrunc` functions +/// with their normalized equivalents. +pub struct PlanNormalize {} + +impl PlanNormalize { + pub fn new() -> Self { + Self {} + } +} + +impl OptimizerRule for PlanNormalize { + fn optimize( + &self, + plan: &LogicalPlan, + optimizer_config: &OptimizerConfig, + ) -> Result { + plan_normalize(self, plan, &mut HashMap::new(), optimizer_config) + } + + fn name(&self) -> &str { + "__cube__plan_normalize" + } +} + +/// Recursively optimizes the logical plan, searching for logical plan nodes +/// and expressions that can be normalized. +/// +/// `remapped_columns` passed to the function is assumed to be empty unless stated otherwise. +fn plan_normalize( + optimizer: &PlanNormalize, + plan: &LogicalPlan, + remapped_columns: &mut HashMap, + optimizer_config: &OptimizerConfig, +) -> Result { + match plan { + LogicalPlan::Projection(Projection { + expr, + input, + schema: _, + alias, + }) => { + let input = plan_normalize(optimizer, input, remapped_columns, optimizer_config)?; + let new_expr = expr + .iter() + .map(|expr| expr_normalize(optimizer, expr, remapped_columns, optimizer_config)) + .collect::>>()?; + let alias = alias.clone(); + + *remapped_columns = HashMap::new(); + for (expr, new_expr) in expr.iter().zip(new_expr.iter()) { + let old_name = expr.name(&DFSchema::empty())?; + let new_name = new_expr.name(&DFSchema::empty())?; + if old_name != new_name { + let old_column = Column { + relation: alias.clone(), + name: old_name, + }; + let new_column = Column { + relation: alias.clone(), + name: new_name, + }; + remapped_columns.insert(old_column, new_column); + } + } + + LogicalPlanBuilder::from(input) + .project_with_alias(new_expr, alias)? + .build() + } + + LogicalPlan::Filter(Filter { predicate, input }) => { + let input = plan_normalize(optimizer, input, remapped_columns, optimizer_config)?; + let predicate = + expr_normalize(optimizer, predicate, remapped_columns, optimizer_config)?; + + LogicalPlanBuilder::from(input).filter(predicate)?.build() + } + + LogicalPlan::Window(Window { + input, + window_expr, + schema: _, + }) => { + let input = plan_normalize(optimizer, input, remapped_columns, optimizer_config)?; + let new_window_expr = window_expr + .iter() + .map(|expr| expr_normalize(optimizer, expr, remapped_columns, optimizer_config)) + .collect::>>()?; + + for (window_expr, new_window_expr) in window_expr.iter().zip(new_window_expr.iter()) { + let old_name = window_expr.name(&DFSchema::empty())?; + let new_name = new_window_expr.name(&DFSchema::empty())?; + if old_name != new_name { + let old_column = Column::from_name(old_name); + let new_column = Column::from_name(new_name); + remapped_columns.insert(old_column, new_column); + } + } + + LogicalPlanBuilder::from(input) + .window(new_window_expr)? + .build() + } + + LogicalPlan::Aggregate(Aggregate { + input, + group_expr, + aggr_expr, + schema: _, + }) => { + let input = plan_normalize(optimizer, input, remapped_columns, optimizer_config)?; + let new_group_expr = group_expr + .iter() + .map(|expr| expr_normalize(optimizer, expr, remapped_columns, optimizer_config)) + .collect::>>()?; + let new_aggr_expr = aggr_expr + .iter() + .map(|expr| expr_normalize(optimizer, expr, remapped_columns, optimizer_config)) + .collect::>>()?; + + *remapped_columns = HashMap::new(); + for (group_expr, new_group_expr) in group_expr.iter().zip(new_group_expr.iter()) { + let old_name = group_expr.name(&DFSchema::empty())?; + let new_name = new_group_expr.name(&DFSchema::empty())?; + if old_name != new_name { + let old_column = Column::from_name(old_name); + let new_column = Column::from_name(new_name); + remapped_columns.insert(old_column, new_column); + } + } + for (aggr_expr, new_aggr_expr) in aggr_expr.iter().zip(new_aggr_expr.iter()) { + let old_name = aggr_expr.name(&DFSchema::empty())?; + let new_name = new_aggr_expr.name(&DFSchema::empty())?; + if old_name != new_name { + let old_column = Column::from_name(old_name); + let new_column = Column::from_name(new_name); + remapped_columns.insert(old_column, new_column); + } + } + + LogicalPlanBuilder::from(input) + .aggregate(new_group_expr, new_aggr_expr)? + .build() + } + + LogicalPlan::Sort(Sort { expr, input }) => { + let expr = expr + .iter() + .map(|expr| expr_normalize(optimizer, expr, remapped_columns, optimizer_config)) + .collect::>>()?; + let input = plan_normalize(optimizer, input, remapped_columns, optimizer_config)?; + + LogicalPlanBuilder::from(input).sort(expr)?.build() + } + + LogicalPlan::Join(Join { + left, + right, + on, + join_type, + join_constraint, + schema: _, + null_equals_null, + }) => { + let mut right_remapped_columns = HashMap::new(); + let left = Arc::new(plan_normalize( + optimizer, + left, + remapped_columns, + optimizer_config, + )?); + let right = Arc::new(plan_normalize( + optimizer, + right, + &mut right_remapped_columns, + optimizer_config, + )?); + let on = on + .iter() + .map(|(left_column, right_column)| { + let left_column = column_normalize( + optimizer, + left_column, + remapped_columns, + optimizer_config, + )?; + let right_column = column_normalize( + optimizer, + right_column, + &right_remapped_columns, + optimizer_config, + )?; + Ok((left_column, right_column)) + }) + .collect::>>()?; + let join_type = *join_type; + let join_constraint = *join_constraint; + let schema = Arc::new(build_join_schema( + &left.schema(), + &right.schema(), + &join_type, + )?); + let null_equals_null = *null_equals_null; + + remapped_columns.extend(right_remapped_columns); + + Ok(LogicalPlan::Join(Join { + left, + right, + on, + join_type, + join_constraint, + schema, + null_equals_null, + })) + } + + LogicalPlan::CrossJoin(CrossJoin { + left, + right, + schema: _, + }) => { + let mut right_remapped_columns = HashMap::new(); + let left = plan_normalize(optimizer, left, remapped_columns, optimizer_config)?; + let right = plan_normalize( + optimizer, + right, + &mut right_remapped_columns, + optimizer_config, + )?; + + remapped_columns.extend(right_remapped_columns); + + LogicalPlanBuilder::from(left).cross_join(&right)?.build() + } + + LogicalPlan::Repartition(Repartition { + input, + partitioning_scheme, + }) => { + let input = plan_normalize(optimizer, input, remapped_columns, optimizer_config)?; + let partitioning_scheme = match partitioning_scheme { + Partitioning::RoundRobinBatch(n) => Partitioning::RoundRobinBatch(*n), + Partitioning::Hash(exprs, n) => { + let exprs = exprs + .iter() + .map(|expr| { + expr_normalize(optimizer, expr, remapped_columns, optimizer_config) + }) + .collect::>>()?; + Partitioning::Hash(exprs, *n) + } + }; + + LogicalPlanBuilder::from(input) + .repartition(partitioning_scheme)? + .build() + } + + LogicalPlan::Union(Union { + inputs, + schema: _, + alias, + }) => { + let mut plan = None; + for input in inputs { + let mut new_remapped_columns = HashMap::new(); + let input = plan_normalize( + optimizer, + input, + &mut new_remapped_columns, + optimizer_config, + )?; + if let Some(last_plan) = plan.take() { + plan = Some(union_with_alias(last_plan, input, alias.clone())?); + } else { + plan = Some(input); + *remapped_columns = new_remapped_columns; + } + } + + plan.ok_or_else(|| { + DataFusionError::Internal("Found UNION plan with no inputs".to_string()) + }) + } + + LogicalPlan::TableScan(TableScan { + table_name, + source, + projection, + projected_schema, + filters, + fetch, + }) => { + let table_name = table_name.clone(); + let source = Arc::clone(source); + let projection = projection.clone(); + let projected_schema = Arc::clone(projected_schema); + let filters = filters + .iter() + .map(|expr| expr_normalize(optimizer, expr, remapped_columns, optimizer_config)) + .collect::>>()?; + let fetch = *fetch; + + Ok(LogicalPlan::TableScan(TableScan { + table_name, + source, + projection, + projected_schema, + filters, + fetch, + })) + } + + p @ LogicalPlan::EmptyRelation(_) => Ok(p.clone()), + + LogicalPlan::Limit(Limit { skip, fetch, input }) => { + let skip = *skip; + let fetch = *fetch; + let input = plan_normalize(optimizer, input, remapped_columns, optimizer_config)?; + + LogicalPlanBuilder::from(input).limit(skip, fetch)?.build() + } + + LogicalPlan::Subquery(Subquery { + input, + subqueries, + types, + schema: _, + }) => { + let input = plan_normalize(optimizer, input, remapped_columns, optimizer_config)?; + let mut new_subqueries = Vec::with_capacity(subqueries.len()); + for subquery in subqueries { + let mut subquery_remapped_columns = HashMap::new(); + let new_subquery = plan_normalize( + optimizer, + subquery, + &mut subquery_remapped_columns, + optimizer_config, + )?; + new_subqueries.push(new_subquery); + remapped_columns.extend(subquery_remapped_columns); + } + let types = types.clone(); + + LogicalPlanBuilder::from(input) + .subquery(new_subqueries, types)? + .build() + } + + p @ LogicalPlan::CreateExternalTable(_) => Ok(p.clone()), + + LogicalPlan::CreateMemoryTable(CreateMemoryTable { name, input }) => { + let name = name.clone(); + let input = Arc::new(plan_normalize( + optimizer, + input, + remapped_columns, + optimizer_config, + )?); + Ok(LogicalPlan::CreateMemoryTable(CreateMemoryTable { + name, + input, + })) + } + + p @ LogicalPlan::CreateCatalogSchema(_) => Ok(p.clone()), + + p @ LogicalPlan::DropTable(_) => Ok(p.clone()), + + LogicalPlan::Values(Values { schema: _, values }) => { + let values = values + .iter() + .map(|row| { + row.iter() + .map(|expr| { + expr_normalize(optimizer, expr, remapped_columns, optimizer_config) + }) + .collect::>>() + }) + .collect::>>()?; + + LogicalPlanBuilder::values(values)?.build() + } + + LogicalPlan::Explain(Explain { + verbose, + plan, + stringified_plans: _, + schema: _, + }) => { + let verbose = *verbose; + let plan = plan_normalize(optimizer, plan, remapped_columns, optimizer_config)?; + + *remapped_columns = HashMap::new(); + + LogicalPlanBuilder::from(plan) + .explain(verbose, false)? + .build() + } + + LogicalPlan::Analyze(Analyze { + verbose, + input, + schema: _, + }) => { + let verbose = *verbose; + let input = plan_normalize(optimizer, input, remapped_columns, optimizer_config)?; + + *remapped_columns = HashMap::new(); + + LogicalPlanBuilder::from(input) + .explain(verbose, true)? + .build() + } + + LogicalPlan::TableUDFs(TableUDFs { + expr, + input, + schema: _, + }) => { + let input = Arc::new(plan_normalize( + optimizer, + input, + remapped_columns, + optimizer_config, + )?); + let new_expr = expr + .iter() + .map(|expr| expr_normalize(optimizer, expr, remapped_columns, optimizer_config)) + .collect::>>()?; + let schema = build_table_udf_schema(&input, &new_expr)?; + + for (expr, new_expr) in expr.iter().zip(new_expr.iter()) { + let old_name = expr.name(&DFSchema::empty())?; + let new_name = new_expr.name(&DFSchema::empty())?; + if old_name != new_name { + let old_column = Column::from_name(old_name); + let new_column = Column::from_name(new_name); + remapped_columns.insert(old_column, new_column); + } + } + + Ok(LogicalPlan::TableUDFs(TableUDFs { + expr: new_expr, + input, + schema, + })) + } + + p @ LogicalPlan::Extension(_) => { + // TODO: we don't know how to optimize generic `Extension` node, + // but we might need this if we implement our own `Extension` nodes + // that might appear in the **initial** plan. + // Let's clean remapped columns to be sure though. + *remapped_columns = HashMap::new(); + Ok(p.clone()) + } + + LogicalPlan::Distinct(Distinct { input }) => { + let input = plan_normalize(optimizer, input, remapped_columns, optimizer_config)?; + + LogicalPlanBuilder::from(input).distinct()?.build() + } + } +} + +/// Recursively normalizes expressions. +fn expr_normalize( + optimizer: &PlanNormalize, + expr: &Expr, + remapped_columns: &HashMap, + optimizer_config: &OptimizerConfig, +) -> Result { + match expr { + Expr::Alias(expr, alias) => { + let expr = Box::new(expr_normalize( + optimizer, + expr, + remapped_columns, + optimizer_config, + )?); + let alias = alias.clone(); + Ok(Expr::Alias(expr, alias)) + } + + Expr::OuterColumn(data_type, column) => { + let data_type = data_type.clone(); + let column = column_normalize(optimizer, column, remapped_columns, optimizer_config)?; + Ok(Expr::OuterColumn(data_type, column)) + } + + Expr::Column(column) => { + let column = column_normalize(optimizer, column, remapped_columns, optimizer_config)?; + Ok(Expr::Column(column)) + } + + e @ Expr::ScalarVariable(..) => Ok(e.clone()), + + e @ Expr::Literal(..) => Ok(e.clone()), + + Expr::BinaryExpr { left, op, right } => { + let left = Box::new(expr_normalize( + optimizer, + left, + remapped_columns, + optimizer_config, + )?); + let op = *op; + let right = Box::new(expr_normalize( + optimizer, + right, + remapped_columns, + optimizer_config, + )?); + Ok(Expr::BinaryExpr { left, op, right }) + } + + Expr::AnyExpr { + left, + op, + right, + all, + } => { + let left = Box::new(expr_normalize( + optimizer, + left, + remapped_columns, + optimizer_config, + )?); + let op = *op; + let right = Box::new(expr_normalize( + optimizer, + right, + remapped_columns, + optimizer_config, + )?); + let all = *all; + Ok(Expr::AnyExpr { + left, + op, + right, + all, + }) + } + + Expr::Like(Like { + negated, + expr, + pattern, + escape_char, + }) => { + let negated = *negated; + let expr = Box::new(expr_normalize( + optimizer, + expr, + remapped_columns, + optimizer_config, + )?); + let pattern = Box::new(expr_normalize( + optimizer, + pattern, + remapped_columns, + optimizer_config, + )?); + let escape_char = *escape_char; + Ok(Expr::Like(Like { + negated, + expr, + pattern, + escape_char, + })) + } + + Expr::ILike(Like { + negated, + expr, + pattern, + escape_char, + }) => { + let negated = *negated; + let expr = Box::new(expr_normalize( + optimizer, + expr, + remapped_columns, + optimizer_config, + )?); + let pattern = Box::new(expr_normalize( + optimizer, + pattern, + remapped_columns, + optimizer_config, + )?); + let escape_char = *escape_char; + Ok(Expr::ILike(Like { + negated, + expr, + pattern, + escape_char, + })) + } + + Expr::SimilarTo(Like { + negated, + expr, + pattern, + escape_char, + }) => { + let negated = *negated; + let expr = Box::new(expr_normalize( + optimizer, + expr, + remapped_columns, + optimizer_config, + )?); + let pattern = Box::new(expr_normalize( + optimizer, + pattern, + remapped_columns, + optimizer_config, + )?); + let escape_char = *escape_char; + Ok(Expr::SimilarTo(Like { + negated, + expr, + pattern, + escape_char, + })) + } + + Expr::Not(expr) => { + let expr = Box::new(expr_normalize( + optimizer, + expr, + remapped_columns, + optimizer_config, + )?); + Ok(Expr::Not(expr)) + } + + Expr::IsNotNull(expr) => { + let expr = Box::new(expr_normalize( + optimizer, + expr, + remapped_columns, + optimizer_config, + )?); + Ok(Expr::IsNotNull(expr)) + } + + Expr::IsNull(expr) => { + let expr = Box::new(expr_normalize( + optimizer, + expr, + remapped_columns, + optimizer_config, + )?); + Ok(Expr::IsNull(expr)) + } + + Expr::Negative(expr) => { + let expr = Box::new(expr_normalize( + optimizer, + expr, + remapped_columns, + optimizer_config, + )?); + Ok(Expr::Negative(expr)) + } + + Expr::GetIndexedField { expr, key } => { + let expr = Box::new(expr_normalize( + optimizer, + expr, + remapped_columns, + optimizer_config, + )?); + let key = Box::new(expr_normalize( + optimizer, + key, + remapped_columns, + optimizer_config, + )?); + Ok(Expr::GetIndexedField { expr, key }) + } + + Expr::Between { + expr, + negated, + low, + high, + } => { + let expr = Box::new(expr_normalize( + optimizer, + expr, + remapped_columns, + optimizer_config, + )?); + let negated = *negated; + let low = Box::new(expr_normalize( + optimizer, + low, + remapped_columns, + optimizer_config, + )?); + let high = Box::new(expr_normalize( + optimizer, + high, + remapped_columns, + optimizer_config, + )?); + Ok(Expr::Between { + expr, + negated, + low, + high, + }) + } + + Expr::Case { + expr, + when_then_expr, + else_expr, + } => { + let expr = expr + .as_ref() + .map(|e| { + Ok::<_, DataFusionError>(Box::new(expr_normalize( + optimizer, + e, + remapped_columns, + optimizer_config, + )?)) + }) + .transpose()?; + let when_then_expr = when_then_expr + .iter() + .map(|(when, then)| { + Ok(( + Box::new(expr_normalize( + optimizer, + when, + remapped_columns, + optimizer_config, + )?), + Box::new(expr_normalize( + optimizer, + then, + remapped_columns, + optimizer_config, + )?), + )) + }) + .collect::>>()?; + let else_expr = else_expr + .as_ref() + .map(|e| { + Ok::<_, DataFusionError>(Box::new(expr_normalize( + optimizer, + e, + remapped_columns, + optimizer_config, + )?)) + }) + .transpose()?; + Ok(Expr::Case { + expr, + when_then_expr, + else_expr, + }) + } + + Expr::Cast { expr, data_type } => { + let expr = Box::new(expr_normalize( + optimizer, + expr, + remapped_columns, + optimizer_config, + )?); + let data_type = data_type.clone(); + Ok(Expr::Cast { expr, data_type }) + } + + Expr::TryCast { expr, data_type } => { + let expr = Box::new(expr_normalize( + optimizer, + expr, + remapped_columns, + optimizer_config, + )?); + let data_type = data_type.clone(); + Ok(Expr::TryCast { expr, data_type }) + } + + Expr::Sort { + expr, + asc, + nulls_first, + } => { + let expr = Box::new(expr_normalize( + optimizer, + expr, + remapped_columns, + optimizer_config, + )?); + let asc = *asc; + let nulls_first = *nulls_first; + Ok(Expr::Sort { + expr, + asc, + nulls_first, + }) + } + + Expr::ScalarFunction { fun, args } => { + let (fun, args) = scalar_function_normalize( + optimizer, + fun, + args, + remapped_columns, + optimizer_config, + )?; + Ok(Expr::ScalarFunction { fun, args }) + } + + Expr::ScalarUDF { fun, args } => { + let fun = Arc::clone(fun); + let args = args + .iter() + .map(|arg| expr_normalize(optimizer, arg, remapped_columns, optimizer_config)) + .collect::>>()?; + Ok(Expr::ScalarUDF { fun, args }) + } + + Expr::TableUDF { fun, args } => { + let fun = Arc::clone(fun); + let args = args + .iter() + .map(|arg| expr_normalize(optimizer, arg, remapped_columns, optimizer_config)) + .collect::>>()?; + Ok(Expr::TableUDF { fun, args }) + } + + Expr::AggregateFunction { + fun, + args, + distinct, + within_group, + } => { + let fun = fun.clone(); + let args = args + .iter() + .map(|arg| expr_normalize(optimizer, arg, remapped_columns, optimizer_config)) + .collect::>>()?; + let distinct = *distinct; + let within_group = within_group + .as_ref() + .map(|expr| { + expr.iter() + .map(|e| expr_normalize(optimizer, e, remapped_columns, optimizer_config)) + .collect::>>() + }) + .transpose()?; + Ok(Expr::AggregateFunction { + fun, + args, + distinct, + within_group, + }) + } + + Expr::WindowFunction { + fun, + args, + partition_by, + order_by, + window_frame, + } => { + let fun = fun.clone(); + let args = args + .iter() + .map(|arg| expr_normalize(optimizer, arg, remapped_columns, optimizer_config)) + .collect::>>()?; + let partition_by = partition_by + .iter() + .map(|expr| expr_normalize(optimizer, expr, remapped_columns, optimizer_config)) + .collect::>>()?; + let order_by = order_by + .iter() + .map(|expr| expr_normalize(optimizer, expr, remapped_columns, optimizer_config)) + .collect::>>()?; + let window_frame = *window_frame; + Ok(Expr::WindowFunction { + fun, + args, + partition_by, + order_by, + window_frame, + }) + } + + Expr::AggregateUDF { fun, args } => { + let fun = Arc::clone(fun); + let args = args + .iter() + .map(|arg| expr_normalize(optimizer, arg, remapped_columns, optimizer_config)) + .collect::>>()?; + Ok(Expr::AggregateUDF { fun, args }) + } + + Expr::InList { + expr, + list, + negated, + } => { + let expr = Box::new(expr_normalize( + optimizer, + expr, + remapped_columns, + optimizer_config, + )?); + let list = list + .iter() + .map(|e| expr_normalize(optimizer, e, remapped_columns, optimizer_config)) + .collect::>>()?; + let negated = *negated; + Ok(Expr::InList { + expr, + list, + negated, + }) + } + + Expr::InSubquery { + expr, + subquery, + negated, + } => { + let expr = Box::new(expr_normalize( + optimizer, + expr, + remapped_columns, + optimizer_config, + )?); + let subquery = Box::new(expr_normalize( + optimizer, + subquery, + remapped_columns, + optimizer_config, + )?); + let negated = *negated; + Ok(Expr::InSubquery { + expr, + subquery, + negated, + }) + } + + e @ Expr::Wildcard => Ok(e.clone()), + + e @ Expr::QualifiedWildcard { .. } => Ok(e.clone()), + + Expr::GroupingSet(grouping_set) => { + let grouping_set = grouping_set_normalize( + optimizer, + grouping_set, + remapped_columns, + optimizer_config, + )?; + Ok(Expr::GroupingSet(grouping_set)) + } + } +} + +/// Normalizes columns, taking remapped columns into account. +fn column_normalize( + _optimizer: &PlanNormalize, + column: &Column, + remapped_columns: &HashMap, + _optimizer_config: &OptimizerConfig, +) -> Result { + if let Some(new_column) = remapped_columns.get(column) { + return Ok(new_column.clone()); + } + Ok(column.clone()) +} + +/// Recursively normalizes scalar functions. +/// Currently this includes replacing literal granularities in `DatePart` and `DateTrunc` +/// functions with their normalized (parsed or lowercase) equivalents. +fn scalar_function_normalize( + optimizer: &PlanNormalize, + fun: &BuiltinScalarFunction, + args: &[Expr], + remapped_columns: &HashMap, + optimizer_config: &OptimizerConfig, +) -> Result<(BuiltinScalarFunction, Vec)> { + let fun = fun.clone(); + let mut args = args + .iter() + .map(|arg| expr_normalize(optimizer, arg, remapped_columns, optimizer_config)) + .collect::>>()?; + + // If the function is `DatePart` or `DateTrunc` and the first argument is a literal string, + // normalize the granularity by parsing it and replacing with standartized granularity. + // If it cannot be parsed, simply convert it to lowercase. + if matches!( + fun, + BuiltinScalarFunction::DatePart | BuiltinScalarFunction::DateTrunc + ) && args.len() > 0 + { + if let Expr::Literal(ScalarValue::Utf8(Some(granularity))) = &mut args[0] { + if let Ok(parsed_granularity) = granularity.parse::() { + *granularity = parsed_granularity.as_str().to_string(); + } else { + *granularity = granularity.to_ascii_lowercase(); + } + } + } + + Ok((fun, args)) +} + +/// Recursively normalizes grouping sets. +fn grouping_set_normalize( + optimizer: &PlanNormalize, + grouping_set: &GroupingSet, + remapped_columns: &HashMap, + optimizer_config: &OptimizerConfig, +) -> Result { + match grouping_set { + GroupingSet::Rollup(exprs) => { + let exprs = exprs + .iter() + .map(|expr| expr_normalize(optimizer, expr, remapped_columns, optimizer_config)) + .collect::>>()?; + Ok(GroupingSet::Rollup(exprs)) + } + + GroupingSet::Cube(exprs) => { + let exprs = exprs + .iter() + .map(|expr| expr_normalize(optimizer, expr, remapped_columns, optimizer_config)) + .collect::>>()?; + Ok(GroupingSet::Cube(exprs)) + } + + GroupingSet::GroupingSets(exprs) => { + let exprs = exprs + .iter() + .map(|exprs| { + Ok(exprs + .iter() + .map(|expr| { + expr_normalize(optimizer, expr, remapped_columns, optimizer_config) + }) + .collect::>>()?) + }) + .collect::>>()?; + Ok(GroupingSet::GroupingSets(exprs)) + } + } +} diff --git a/rust/cubesql/cubesql/src/compile/mod.rs b/rust/cubesql/cubesql/src/compile/mod.rs index 7aa24fbdc107e..fd4fb2c54dc10 100644 --- a/rust/cubesql/cubesql/src/compile/mod.rs +++ b/rust/cubesql/cubesql/src/compile/mod.rs @@ -13879,8 +13879,8 @@ ORDER BY "source"."str0" ASC let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql; - // check if contains `CAST(EXTRACT(YEAR FROM ..) || .. || .. || ..)` - let re = Regex::new(r"CAST.+EXTRACT.+YEAR FROM(.+ \|\|){3}").unwrap(); + // check if contains `CAST(EXTRACT(year FROM ..) || .. || .. || ..)` + let re = Regex::new(r"CAST.+EXTRACT.+year FROM(.+ \|\|){3}").unwrap(); assert!(re.is_match(&sql)); // check if contains `LOWER(..) = .. AND LOWER(..) = ..` let re = Regex::new(r"LOWER ?\(.+\) = .+ AND .+LOWER ?\(.+\) = .+").unwrap(); @@ -14031,8 +14031,8 @@ ORDER BY "source"."str0" ASC if Rewriter::sql_push_down_enabled() { let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql; - assert!(sql.contains("EXTRACT(YEAR")); - assert!(sql.contains("EXTRACT(MONTH")); + assert!(sql.contains("EXTRACT(year")); + assert!(sql.contains("EXTRACT(month")); let physical_plan = query_plan.as_physical_plan().await.unwrap(); println!( @@ -15114,7 +15114,7 @@ ORDER BY "source"."str0" ASC .find_cube_scan_wrapped_sql() .wrapped_sql .sql - .contains("EXTRACT(MONTH FROM ")); + .contains("EXTRACT(month FROM ")); } #[tokio::test] @@ -15155,7 +15155,7 @@ ORDER BY "source"."str0" ASC let logical_plan = query_plan.as_logical_plan(); let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql; assert!(sql.contains("order_date")); - assert!(sql.contains("EXTRACT(DAY FROM")) + assert!(sql.contains("EXTRACT(day FROM")) } #[tokio::test] @@ -16522,7 +16522,7 @@ LIMIT {{ limit }}{% endif %}"#.to_string(), let logical_plan = query_plan.as_logical_plan(); let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql; - assert!(sql.contains("EXTRACT(EPOCH")); + assert!(sql.contains("EXTRACT(epoch")); // Databricks let query_plan = convert_select_to_query_plan_customized( diff --git a/rust/cubesql/cubesql/src/compile/query_engine.rs b/rust/cubesql/cubesql/src/compile/query_engine.rs index 5c43baeebb1a7..2b5e442f358bb 100644 --- a/rust/cubesql/cubesql/src/compile/query_engine.rs +++ b/rust/cubesql/cubesql/src/compile/query_engine.rs @@ -1,4 +1,3 @@ -use crate::compile::engine::df::planner::CubeQueryPlanner; use std::{ backtrace::Backtrace, collections::HashMap, future::Future, pin::Pin, sync::Arc, time::SystemTime, @@ -8,7 +7,10 @@ use crate::{ compile::{ engine::{ df::{ - optimizers::{FilterPushDown, FilterSplitMeta, LimitPushDown, SortPushDown}, + optimizers::{ + FilterPushDown, FilterSplitMeta, LimitPushDown, PlanNormalize, SortPushDown, + }, + planner::CubeQueryPlanner, scan::CubeScanNode, wrapper::{CubeScanWrappedSqlNode, CubeScanWrapperNode}, }, @@ -138,6 +140,7 @@ pub trait QueryEngine { let optimizer_config = OptimizerConfig::new(); let optimizers: Vec> = vec![ + Arc::new(PlanNormalize::new()), Arc::new(ProjectionDropOut::new()), Arc::new(FilterPushDown::new()), Arc::new(SortPushDown::new()), diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/dates.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/dates.rs index b97ecb321c9c8..b0b7702be2ce5 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/dates.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/dates.rs @@ -271,7 +271,7 @@ impl RewriteRules for DateRules { binary_expr( negative_expr(self.fun_expr( "DatePart", - vec![literal_string("DOW"), column_expr("?column")], + vec![literal_string("dow"), column_expr("?column")], )), "*", // TODO match diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/filters.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/filters.rs index 1932938040e61..c34cc60e43762 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/filters.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/filters.rs @@ -1671,34 +1671,6 @@ impl RewriteRules for FilterRules { // DATE_PART('year', "KibanaSampleDataEcommerce"."order_date") = 2019 transforming_rewrite( "extract-year-equals", - filter_replacer( - binary_expr( - self.fun_expr( - "DatePart", - vec![literal_string("YEAR"), column_expr("?column")], - ), - "=", - literal_expr("?year"), - ), - "?alias_to_cube", - "?members", - "?filter_aliases", - ), - filter_member("?member", "FilterMemberOp:inDateRange", "?values"), - self.transform_filter_extract_year_equals( - "?year", - "?column", - "?alias_to_cube", - "?members", - "?member", - "?values", - "?filter_aliases", - ), - ), - // Same as the rule above, but it uses different case for granularity. - // TODO: Remove, whenever we will fix bug with granularity cases. CORE-1761 - transforming_rewrite( - "extract-year-equals-lower-case", filter_replacer( binary_expr( self.fun_expr( @@ -1732,7 +1704,7 @@ impl RewriteRules for FilterRules { "Trunc", vec![self.fun_expr( "DatePart", - vec![literal_string("YEAR"), column_expr("?column")], + vec![literal_string("year"), column_expr("?column")], )], ), "=", diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/old_split.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/old_split.rs index 79b5fe0e5c23a..32a764e735949 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/old_split.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/old_split.rs @@ -866,7 +866,7 @@ impl RewriteRules for OldSplitRules { self.fun_expr( "DatePart", vec![ - literal_string("MONTH"), + literal_string("month"), column_expr("?column"), ], ), @@ -887,7 +887,7 @@ impl RewriteRules for OldSplitRules { self.fun_expr( "DatePart", vec![ - literal_string("YEAR"), + literal_string("year"), column_expr("?column"), ], ), @@ -898,7 +898,7 @@ impl RewriteRules for OldSplitRules { self.fun_expr( "DatePart", vec![ - literal_string("MONTH"), + literal_string("month"), column_expr("?column"), ], ), @@ -952,7 +952,7 @@ impl RewriteRules for OldSplitRules { self.fun_expr( "DatePart", vec![ - literal_string("MONTH"), + literal_string("month"), column_expr("?column"), ], ), @@ -973,7 +973,7 @@ impl RewriteRules for OldSplitRules { self.fun_expr( "DatePart", vec![ - literal_string("YEAR"), + literal_string("year"), column_expr("?column"), ], ), @@ -984,7 +984,7 @@ impl RewriteRules for OldSplitRules { self.fun_expr( "DatePart", vec![ - literal_string("MONTH"), + literal_string("month"), column_expr("?column"), ], ), @@ -1087,7 +1087,7 @@ impl RewriteRules for OldSplitRules { binary_expr( self.fun_expr( "DatePart", - vec![literal_string("MONTH"), column_expr("?column")], + vec![literal_string("month"), column_expr("?column")], ), "-", literal_int(1), @@ -1102,7 +1102,7 @@ impl RewriteRules for OldSplitRules { alias_expr( self.fun_expr( "DateTrunc", - vec![literal_string("MONTH"), column_expr("?column")], + vec![literal_string("month"), column_expr("?column")], ), "?outer_alias", ), @@ -1118,7 +1118,7 @@ impl RewriteRules for OldSplitRules { binary_expr( self.fun_expr( "DatePart", - vec![literal_string("MONTH"), column_expr("?column")], + vec![literal_string("month"), column_expr("?column")], ), "-", literal_int(1), @@ -1136,7 +1136,7 @@ impl RewriteRules for OldSplitRules { binary_expr( self.fun_expr( "DatePart", - vec![literal_string("MONTH"), column_expr("?outer_column")], + vec![literal_string("month"), column_expr("?outer_column")], ), "-", literal_int(1), @@ -1166,7 +1166,7 @@ impl RewriteRules for OldSplitRules { cast_expr_explicit( self.fun_expr( "DatePart", - vec![literal_string("YEAR"), column_expr("?column")], + vec![literal_string("year"), column_expr("?column")], ), ArrowDataType::Utf8, ), @@ -1180,7 +1180,7 @@ impl RewriteRules for OldSplitRules { cast_expr_explicit( self.fun_expr( "DatePart", - vec![literal_string("MONTH"), column_expr("?column")], + vec![literal_string("month"), column_expr("?column")], ), ArrowDataType::Utf8, ), @@ -1209,7 +1209,7 @@ impl RewriteRules for OldSplitRules { cast_expr_explicit( self.fun_expr( "DatePart", - vec![literal_string("YEAR"), column_expr("?column")], + vec![literal_string("year"), column_expr("?column")], ), ArrowDataType::Utf8, ), @@ -1223,7 +1223,7 @@ impl RewriteRules for OldSplitRules { cast_expr_explicit( self.fun_expr( "DatePart", - vec![literal_string("MONTH"), column_expr("?column")], + vec![literal_string("month"), column_expr("?column")], ), ArrowDataType::Utf8, ), @@ -1256,7 +1256,7 @@ impl RewriteRules for OldSplitRules { cast_expr_explicit( self.fun_expr( "DatePart", - vec![literal_string("YEAR"), column_expr("?column")], + vec![literal_string("year"), column_expr("?column")], ), ArrowDataType::Utf8, ), @@ -1270,7 +1270,7 @@ impl RewriteRules for OldSplitRules { cast_expr_explicit( self.fun_expr( "DatePart", - vec![literal_string("MONTH"), column_expr("?column")], + vec![literal_string("month"), column_expr("?column")], ), ArrowDataType::Utf8, ), @@ -1304,7 +1304,7 @@ impl RewriteRules for OldSplitRules { cast_expr_explicit( self.fun_expr( "DatePart", - vec![literal_string("YEAR"), column_expr("?column")], + vec![literal_string("year"), column_expr("?column")], ), ArrowDataType::Utf8, ), @@ -1315,7 +1315,7 @@ impl RewriteRules for OldSplitRules { cast_expr_explicit( self.fun_expr( "DatePart", - vec![literal_string("QUARTER"), column_expr("?column")], + vec![literal_string("quarter"), column_expr("?column")], ), ArrowDataType::Utf8, ), @@ -1340,7 +1340,7 @@ impl RewriteRules for OldSplitRules { cast_expr_explicit( self.fun_expr( "DatePart", - vec![literal_string("YEAR"), column_expr("?column")], + vec![literal_string("year"), column_expr("?column")], ), ArrowDataType::Utf8, ), @@ -1351,7 +1351,7 @@ impl RewriteRules for OldSplitRules { cast_expr_explicit( self.fun_expr( "DatePart", - vec![literal_string("QUARTER"), column_expr("?column")], + vec![literal_string("quarter"), column_expr("?column")], ), ArrowDataType::Utf8, ), @@ -1380,7 +1380,7 @@ impl RewriteRules for OldSplitRules { cast_expr_explicit( self.fun_expr( "DatePart", - vec![literal_string("YEAR"), column_expr("?column")], + vec![literal_string("year"), column_expr("?column")], ), ArrowDataType::Utf8, ), @@ -1391,7 +1391,7 @@ impl RewriteRules for OldSplitRules { cast_expr_explicit( self.fun_expr( "DatePart", - vec![literal_string("QUARTER"), column_expr("?column")], + vec![literal_string("quarter"), column_expr("?column")], ), ArrowDataType::Utf8, ), @@ -2279,7 +2279,7 @@ impl RewriteRules for OldSplitRules { outer_aggregate_split_replacer( self.fun_expr( "DatePart", - vec![literal_string("YEAR"), column_expr("?column")], + vec![literal_string("year"), column_expr("?column")], ), "?cube", ), @@ -2288,7 +2288,7 @@ impl RewriteRules for OldSplitRules { outer_aggregate_split_replacer( self.fun_expr( "DatePart", - vec![literal_string("YEAR"), column_expr("?column")], + vec![literal_string("year"), column_expr("?column")], ), "?cube", ), @@ -2301,7 +2301,7 @@ impl RewriteRules for OldSplitRules { outer_aggregate_split_replacer( self.fun_expr( "DatePart", - vec![literal_string("MONTH"), column_expr("?column")], + vec![literal_string("month"), column_expr("?column")], ), "?cube", ), @@ -2310,7 +2310,7 @@ impl RewriteRules for OldSplitRules { outer_aggregate_split_replacer( self.fun_expr( "DatePart", - vec![literal_string("MONTH"), column_expr("?column")], + vec![literal_string("month"), column_expr("?column")], ), "?cube", ), @@ -2769,7 +2769,7 @@ impl RewriteRules for OldSplitRules { self.fun_expr( "DatePart", vec![ - literal_string("YEAR"), + literal_string("year"), "?column".to_string(), ], ), @@ -2821,7 +2821,7 @@ impl RewriteRules for OldSplitRules { self.fun_expr( "DatePart", vec![ - literal_string("YEAR"), + literal_string("year"), "?column".to_string(), ], ), @@ -2871,7 +2871,7 @@ impl RewriteRules for OldSplitRules { self.fun_expr( "DatePart", vec![ - literal_string("YEAR"), + literal_string("year"), "?column".to_string(), ], ), @@ -2925,7 +2925,7 @@ impl RewriteRules for OldSplitRules { self.fun_expr( "DatePart", vec![ - literal_string("MONTH"), + literal_string("month"), column_expr("?column"), ], ), @@ -2955,7 +2955,7 @@ impl RewriteRules for OldSplitRules { self.fun_expr( "DatePart", vec![ - literal_string("YEAR"), + literal_string("year"), column_expr("?column"), ], ), @@ -2966,7 +2966,7 @@ impl RewriteRules for OldSplitRules { self.fun_expr( "DatePart", vec![ - literal_string("MONTH"), + literal_string("month"), column_expr("?column"), ], ), @@ -3022,7 +3022,7 @@ impl RewriteRules for OldSplitRules { self.fun_expr( "DatePart", vec![ - literal_string("MONTH"), + literal_string("month"), column_expr("?column"), ], ), @@ -3052,7 +3052,7 @@ impl RewriteRules for OldSplitRules { self.fun_expr( "DatePart", vec![ - literal_string("YEAR"), + literal_string("year"), column_expr("?column"), ], ), @@ -3063,7 +3063,7 @@ impl RewriteRules for OldSplitRules { self.fun_expr( "DatePart", vec![ - literal_string("MONTH"), + literal_string("month"), column_expr("?column"), ], ), @@ -3126,7 +3126,7 @@ impl RewriteRules for OldSplitRules { self.fun_expr( "DatePart", vec![ - literal_string("YEAR"), + literal_string("year"), column_expr("?column"), ], ), @@ -3170,7 +3170,7 @@ impl RewriteRules for OldSplitRules { self.fun_expr( "DatePart", vec![ - literal_string("YEAR"), + literal_string("year"), column_expr("?column"), ], ), @@ -3218,7 +3218,7 @@ impl RewriteRules for OldSplitRules { self.fun_expr( "DatePart", vec![ - literal_string("YEAR"), + literal_string("year"), column_expr("?column"), ], ), @@ -3265,7 +3265,7 @@ impl RewriteRules for OldSplitRules { self.fun_expr( "DatePart", vec![ - literal_string("YEAR"), + literal_string("year"), column_expr("?column"), ], ), @@ -3302,7 +3302,7 @@ impl RewriteRules for OldSplitRules { self.fun_expr( "DatePart", vec![ - literal_string("YEAR"), + literal_string("year"), column_expr("?column"), ], ), @@ -3343,7 +3343,7 @@ impl RewriteRules for OldSplitRules { self.fun_expr( "DatePart", vec![ - literal_string("YEAR"), + literal_string("year"), column_expr("?column"), ], ), @@ -3385,7 +3385,7 @@ impl RewriteRules for OldSplitRules { self.fun_expr( "DatePart", vec![ - literal_string("YEAR"), + literal_string("year"), column_expr("?column"), ], ), @@ -3403,7 +3403,7 @@ impl RewriteRules for OldSplitRules { self.fun_expr( "DatePart", vec![ - literal_string("MONTH"), + literal_string("month"), column_expr("?column"), ], ), @@ -3455,7 +3455,7 @@ impl RewriteRules for OldSplitRules { self.fun_expr( "DatePart", vec![ - literal_string("YEAR"), + literal_string("year"), column_expr("?column"), ], ), @@ -3473,7 +3473,7 @@ impl RewriteRules for OldSplitRules { self.fun_expr( "DatePart", vec![ - literal_string("MONTH"), + literal_string("month"), column_expr("?column"), ], ), @@ -3529,7 +3529,7 @@ impl RewriteRules for OldSplitRules { self.fun_expr( "DatePart", vec![ - literal_string("YEAR"), + literal_string("year"), column_expr("?column"), ], ), @@ -3547,7 +3547,7 @@ impl RewriteRules for OldSplitRules { self.fun_expr( "DatePart", vec![ - literal_string("MONTH"), + literal_string("month"), column_expr("?column"), ], ), @@ -3612,7 +3612,7 @@ impl RewriteRules for OldSplitRules { binary_expr( self.fun_expr( "DatePart", - vec![literal_string("YEAR"), column_expr("?column")], + vec![literal_string("year"), column_expr("?column")], ), "||", literal_string("-"), @@ -3620,7 +3620,7 @@ impl RewriteRules for OldSplitRules { "||", self.fun_expr( "DatePart", - vec![literal_string("MONTH"), column_expr("?column")], + vec![literal_string("month"), column_expr("?column")], ), ), "||", @@ -3634,7 +3634,7 @@ impl RewriteRules for OldSplitRules { binary_expr( self.fun_expr( "DatePart", - vec![literal_string("MONTH"), column_expr("?column")], + vec![literal_string("month"), column_expr("?column")], ), "-", literal_int(1), @@ -3687,7 +3687,7 @@ impl RewriteRules for OldSplitRules { binary_expr( self.fun_expr( "DatePart", - vec![literal_string("YEAR"), column_expr("?column")], + vec![literal_string("year"), column_expr("?column")], ), "||", literal_string("-"), @@ -3695,7 +3695,7 @@ impl RewriteRules for OldSplitRules { "||", self.fun_expr( "DatePart", - vec![literal_string("MONTH"), column_expr("?column")], + vec![literal_string("month"), column_expr("?column")], ), ), "||", @@ -3709,7 +3709,7 @@ impl RewriteRules for OldSplitRules { binary_expr( self.fun_expr( "DatePart", - vec![literal_string("MONTH"), column_expr("?column")], + vec![literal_string("month"), column_expr("?column")], ), "-", literal_int(1), @@ -3777,7 +3777,7 @@ impl RewriteRules for OldSplitRules { binary_expr( self.fun_expr( "DatePart", - vec![literal_string("YEAR"), column_expr("?column")], + vec![literal_string("year"), column_expr("?column")], ), "||", literal_string("-"), @@ -3785,7 +3785,7 @@ impl RewriteRules for OldSplitRules { "||", self.fun_expr( "DatePart", - vec![literal_string("MONTH"), column_expr("?column")], + vec![literal_string("month"), column_expr("?column")], ), ), "||", @@ -3804,7 +3804,7 @@ impl RewriteRules for OldSplitRules { binary_expr( self.fun_expr( "DatePart", - vec![literal_string("MONTH"), column_expr("?column")], + vec![literal_string("month"), column_expr("?column")], ), "-", literal_int(1), @@ -3871,7 +3871,7 @@ impl RewriteRules for OldSplitRules { binary_expr( self.fun_expr( "DatePart", - vec![literal_string("YEAR"), column_expr("?column")], + vec![literal_string("year"), column_expr("?column")], ), "||", literal_string("-"), @@ -3879,7 +3879,7 @@ impl RewriteRules for OldSplitRules { "||", self.fun_expr( "DatePart", - vec![literal_string("MONTH"), column_expr("?column")], + vec![literal_string("month"), column_expr("?column")], ), ), "||", @@ -3898,7 +3898,7 @@ impl RewriteRules for OldSplitRules { binary_expr( self.fun_expr( "DatePart", - vec![literal_string("MONTH"), column_expr("?column")], + vec![literal_string("month"), column_expr("?column")], ), "-", literal_int(1), @@ -4175,7 +4175,7 @@ impl RewriteRules for OldSplitRules { self.fun_expr( "DatePart", vec![ - literal_string("DAY"), + literal_string("day"), udf_expr( "dateadd", vec![ @@ -4259,7 +4259,7 @@ impl RewriteRules for OldSplitRules { self.fun_expr( "DatePart", vec![ - literal_string("DAY"), + literal_string("day"), udf_expr( "dateadd", vec![ diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/split/granularity.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/split/granularity.rs index 805e13b73b86c..b0155afb3499d 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/split/granularity.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/split/granularity.rs @@ -26,7 +26,7 @@ impl SplitRules { binary_expr( self.fun_expr( "DatePart", - vec![literal_string("YEAR"), column_expr("?column")], + vec![literal_string("year"), column_expr("?column")], ), "*", literal_int(100), @@ -75,7 +75,7 @@ impl SplitRules { self.fun_expr( "DatePart", vec![ - literal_string("YEAR"), + literal_string("year"), "?column".to_string(), ], ), @@ -120,7 +120,7 @@ impl SplitRules { self.fun_expr( "DatePart", vec![ - literal_string("DAY"), + literal_string("day"), udf_expr( "dateadd", vec![ @@ -223,7 +223,7 @@ impl SplitRules { self.fun_expr( "DatePart", vec![ - literal_string("YEAR"), + literal_string("year"), column_expr("?column"), ], ), @@ -234,7 +234,7 @@ impl SplitRules { self.fun_expr( "DatePart", vec![ - literal_string("MONTH"), + literal_string("month"), column_expr("?column"), ], ), @@ -256,7 +256,7 @@ impl SplitRules { self.fun_expr( "DatePart", vec![ - literal_string("MONTH"), + literal_string("month"), column_expr("?column"), ], ), @@ -329,7 +329,7 @@ impl SplitRules { binary_expr( self.fun_expr( "DatePart", - vec![literal_string("YEAR"), column_expr("?column")], + vec![literal_string("year"), column_expr("?column")], ), "||", literal_string("-"), @@ -344,7 +344,7 @@ impl SplitRules { self.fun_expr( "DatePart", vec![ - literal_string("MONTH"), + literal_string("month"), column_expr("?column"), ], ), @@ -401,7 +401,7 @@ impl SplitRules { self.fun_expr( "DatePart", vec![ - literal_string("MONTH"), + literal_string("month"), column_expr("?column"), ], ), @@ -422,7 +422,7 @@ impl SplitRules { self.fun_expr( "DatePart", vec![ - literal_string("YEAR"), + literal_string("year"), column_expr("?column"), ], ), @@ -433,7 +433,7 @@ impl SplitRules { self.fun_expr( "DatePart", vec![ - literal_string("MONTH"), + literal_string("month"), column_expr("?column"), ], ), @@ -491,7 +491,7 @@ impl SplitRules { self.fun_expr( "DatePart", vec![ - literal_string("MONTH"), + literal_string("month"), column_expr("?column"), ], ), @@ -521,7 +521,7 @@ impl SplitRules { self.fun_expr( "DatePart", vec![ - literal_string("YEAR"), + literal_string("year"), column_expr("?column"), ], ), @@ -532,7 +532,7 @@ impl SplitRules { self.fun_expr( "DatePart", vec![ - literal_string("MONTH"), + literal_string("month"), column_expr("?column"), ], ), diff --git a/rust/cubesql/cubesql/src/compile/router.rs b/rust/cubesql/cubesql/src/compile/router.rs index 9e013c202dbaf..9441a99b0b684 100644 --- a/rust/cubesql/cubesql/src/compile/router.rs +++ b/rust/cubesql/cubesql/src/compile/router.rs @@ -15,9 +15,8 @@ use crate::{ auth_service::SqlAuthServiceAuthenticateRequest, dataframe, statement::{ - ApproximateCountDistinctVisitor, CastReplacer, DateTokenNormalizeReplacer, - RedshiftDatePartReplacer, SensitiveDataSanitizer, ToTimestampReplacer, - UdfWildcardArgReplacer, + ApproximateCountDistinctVisitor, CastReplacer, RedshiftDatePartReplacer, + SensitiveDataSanitizer, ToTimestampReplacer, UdfWildcardArgReplacer, }, ColumnFlags, ColumnType, Session, SessionManager, SessionState, }, @@ -621,7 +620,6 @@ pub fn rewrite_statement(stmt: ast::Statement) -> ast::Statement { let stmt = CastReplacer::new().replace(stmt); let stmt = ToTimestampReplacer::new().replace(stmt); let stmt = UdfWildcardArgReplacer::new().replace(stmt); - let stmt = DateTokenNormalizeReplacer::new().replace(stmt); let stmt = RedshiftDatePartReplacer::new().replace(stmt); let stmt = ApproximateCountDistinctVisitor::new().replace(stmt); diff --git a/rust/cubesql/cubesql/src/compile/snapshots/cubesql__compile__tests__test_extract_string_field.snap b/rust/cubesql/cubesql/src/compile/snapshots/cubesql__compile__tests__test_extract_string_field.snap index 1f0526cb3be1a..22f6a8eeef6e6 100644 --- a/rust/cubesql/cubesql/src/compile/snapshots/cubesql__compile__tests__test_extract_string_field.snap +++ b/rust/cubesql/cubesql/src/compile/snapshots/cubesql__compile__tests__test_extract_string_field.snap @@ -1,9 +1,9 @@ --- source: cubesql/src/compile/mod.rs -expression: "execute_query(\"SELECT EXTRACT('YEAR' FROM CAST ('2020-12-25 22:48:48.000' AS timestamptz))\".to_string(),\n DatabaseProtocol::PostgreSQL).await?" +expression: "execute_query(\"SELECT EXTRACT('YEAR' FROM CAST ('2020-12-25 22:48:48.000' AS timestamptz))\".to_string(),\nDatabaseProtocol::PostgreSQL).await?" --- +---------------------------------------------------------------------------------------------+ -| datepart(Utf8("YEAR"),CAST(Utf8("2020-12-25 22:48:48.000") AS Timestamp(Nanosecond, None))) | +| datepart(Utf8("year"),CAST(Utf8("2020-12-25 22:48:48.000") AS Timestamp(Nanosecond, None))) | +---------------------------------------------------------------------------------------------+ | 2020 | +---------------------------------------------------------------------------------------------+ diff --git a/rust/cubesql/cubesql/src/compile/test/test_wrapper.rs b/rust/cubesql/cubesql/src/compile/test/test_wrapper.rs index 5db62bc5d772f..ba029cc9299c7 100644 --- a/rust/cubesql/cubesql/src/compile/test/test_wrapper.rs +++ b/rust/cubesql/cubesql/src/compile/test/test_wrapper.rs @@ -1876,7 +1876,7 @@ GROUP BY "replaceAggregationType": null, "addFilters": [{ "cubeParams": ["MultiTypeCube"], - "sql": "(((CAST(TRUNC(EXTRACT(YEAR FROM ${MultiTypeCube.dim_date0})) AS INTEGER) = 2024) AND (CAST(TRUNC(EXTRACT(MONTH FROM ${MultiTypeCube.dim_date0})) AS INTEGER) <= 11)) = TRUE)" + "sql": "(((CAST(TRUNC(EXTRACT(year FROM ${MultiTypeCube.dim_date0})) AS INTEGER) = 2024) AND (CAST(TRUNC(EXTRACT(month FROM ${MultiTypeCube.dim_date0})) AS INTEGER) <= 11)) = TRUE)" }], }, "groupingSet": null, diff --git a/rust/cubesql/cubesql/src/sql/statement.rs b/rust/cubesql/cubesql/src/sql/statement.rs index 221581aae9fd7..77ef0743a4827 100644 --- a/rust/cubesql/cubesql/src/sql/statement.rs +++ b/rust/cubesql/cubesql/src/sql/statement.rs @@ -1,4 +1,3 @@ -use crate::{compile::rewrite::rules::utils::DatePartToken, sql::shim::ConnectionError}; use itertools::Itertools; use log::trace; use pg_srv::{ @@ -12,6 +11,7 @@ use sqlparser::ast::{ use std::{collections::HashMap, error::Error}; use super::types::ColumnType; +use crate::sql::shim::ConnectionError; enum PlaceholderType { String, @@ -804,70 +804,6 @@ impl<'ast> Visitor<'ast, ConnectionError> for CastReplacer { } } -// This approach is limited to literals-in-query, but it's better than nothing -// It would be simpler to do in rewrite rules, by relying on constant folding, but would require cumbersome top-down extraction -// TODO remove this if/when DF starts supporting all of PostgreSQL aliases -#[derive(Debug)] -pub struct DateTokenNormalizeReplacer {} - -impl DateTokenNormalizeReplacer { - pub fn new() -> Self { - Self {} - } - - pub fn replace(mut self, stmt: ast::Statement) -> ast::Statement { - let mut result = stmt; - - self.visit_statement(&mut result).unwrap(); - - result - } -} - -impl<'ast> Visitor<'ast, ConnectionError> for DateTokenNormalizeReplacer { - // TODO support EXTRACT normalization after support in sqlparser - fn visit_function(&mut self, fun: &mut Function) -> Result<(), ConnectionError> { - for res in fun.name.0.iter_mut() { - self.visit_identifier(res)?; - } - - let fn_name = fun.name.to_string().to_lowercase(); - match (fn_name.as_str(), fun.args.len()) { - ("date_trunc", 2) | ("date_part", 2) => { - match &mut fun.args[0] { - FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value( - Value::SingleQuotedString(token), - ))) => { - if let Ok(parsed) = token.parse::() { - *token = parsed.as_str().to_string(); - } else { - // Do nothing - }; - } - _ => { - // Do nothing - } - } - } - _ => { - // Do nothing - } - } - - self.visit_function_args(&mut fun.args)?; - if let Some(over) = &mut fun.over { - for res in over.partition_by.iter_mut() { - self.visit_expr(res)?; - } - for order_expr in over.order_by.iter_mut() { - self.visit_expr(&mut order_expr.expr)?; - } - } - - Ok(()) - } -} - #[derive(Debug)] pub struct RedshiftDatePartReplacer {}