From abb2e0acd245e15dc36138b4856f833dc888ddb1 Mon Sep 17 00:00:00 2001 From: Konstantin Burkalev Date: Mon, 7 Apr 2025 11:47:32 +0300 Subject: [PATCH 1/6] fix(schema-compiler): Fix BigQuery DATE_ADD push down template for years/quarters/months --- .../src/adapter/BigqueryQuery.ts | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/packages/cubejs-schema-compiler/src/adapter/BigqueryQuery.ts b/packages/cubejs-schema-compiler/src/adapter/BigqueryQuery.ts index 37e74c9159594..c431e2cf05d20 100644 --- a/packages/cubejs-schema-compiler/src/adapter/BigqueryQuery.ts +++ b/packages/cubejs-schema-compiler/src/adapter/BigqueryQuery.ts @@ -186,11 +186,21 @@ export class BigqueryQuery extends BaseQuery { } public subtractTimestampInterval(date, interval) { - return `TIMESTAMP_SUB(${date}, INTERVAL ${this.formatInterval(interval)[0]})`; + const [intervalFormatted, timeUnit] = this.formatInterval(interval); + if (['YEAR', 'MONTH', 'QUARTER'].includes(timeUnit)) { + return this.timeStampCast(`DATETIME_SUB(DATETIME(${date}), INTERVAL ${intervalFormatted})`); + } + + return `TIMESTAMP_SUB(${date}, INTERVAL ${intervalFormatted})`; } public addTimestampInterval(date, interval) { - return `TIMESTAMP_ADD(${date}, INTERVAL ${this.formatInterval(interval)[0]})`; + const [intervalFormatted, timeUnit] = this.formatInterval(interval); + if (['YEAR', 'MONTH', 'QUARTER'].includes(timeUnit)) { + return this.timeStampCast(`DATETIME_ADD(DATETIME(${date}), INTERVAL ${intervalFormatted})`); + } + + return `TIMESTAMP_ADD(${date}, INTERVAL ${intervalFormatted})`; } public nowTimestampSql() { @@ -242,7 +252,7 @@ export class BigqueryQuery extends BaseQuery { templates.functions.STRPOS = 'STRPOS({{ args_concat }})'; templates.functions.DATEDIFF = 'DATETIME_DIFF(CAST({{ args[2] }} AS DATETIME), CAST({{ args[1] }} AS DATETIME), {{ date_part }})'; // DATEADD is being rewritten to DATE_ADD - // templates.functions.DATEADD = 'DATETIME_ADD(CAST({{ args[2] }} AS DATETTIME), INTERVAL {{ interval }} {{ date_part }})'; + templates.functions.DATE_ADD = '{% if date_part|upper in [\'YEAR\', \'MONTH\', \'QUARTER\'] %}TIMESTAMP(DATETIME_ADD(DATETIME({{ args[2] }}), INTERVAL {{ interval }} {{ date_part }})){% else %}TIMESTAMP_ADD({{ args[2] }}, INTERVAL {{ interval }} {{ date_part }}){% endif %}'; templates.functions.CURRENTDATE = 'CURRENT_DATE'; delete templates.functions.TO_CHAR; templates.expressions.binary = '{% if op == \'%\' %}MOD({{ left }}, {{ right }}){% else %}({{ left }} {{ op }} {{ right }}){% endif %}'; From 3bd8b55c94bfef73ed95ec1c14d3be4bb9e7201d Mon Sep 17 00:00:00 2001 From: Konstantin Burkalev Date: Mon, 7 Apr 2025 12:49:03 +0300 Subject: [PATCH 2/6] add tests for BQ date_add --- rust/cubesql/cubesql/src/compile/mod.rs | 35 +++++++++++++++++++++---- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/rust/cubesql/cubesql/src/compile/mod.rs b/rust/cubesql/cubesql/src/compile/mod.rs index 0c49244ce6175..e09ff8bc7b88e 100644 --- a/rust/cubesql/cubesql/src/compile/mod.rs +++ b/rust/cubesql/cubesql/src/compile/mod.rs @@ -14659,6 +14659,7 @@ ORDER BY "source"."str0" ASC .contains("DATEADD(day, 7,")); // BigQuery + let bq_templates = vec![("functions/DATE_ADD".to_string(), "{% if date_part|upper in ['YEAR', 'MONTH', 'QUARTER'] %}TIMESTAMP(DATETIME_ADD(DATETIME({{ args[2] }}), INTERVAL {{ interval }} {{ date_part }})){% else %}TIMESTAMP_ADD({{ args[2] }}, INTERVAL {{ interval }} {{ date_part }}){% endif %}".to_string())]; let query_plan = convert_select_to_query_plan_customized( " SELECT DATEADD(DAY, 7, order_date) AS d @@ -14668,9 +14669,7 @@ ORDER BY "source"."str0" ASC " .to_string(), DatabaseProtocol::PostgreSQL, - vec![ - ("functions/DATEADD".to_string(), "DATETIME_ADD(CAST({{ args[2] }} AS DATETTIME), INTERVAL {{ interval }} {{ date_part }})".to_string()), - ], + bq_templates.clone(), ) .await; @@ -14682,9 +14681,35 @@ ORDER BY "source"."str0" ASC let logical_plan = query_plan.as_logical_plan(); let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql; - assert!(sql.contains("DATETIME_ADD(CAST(")); + println!("{}", sql); + assert!(sql.contains("TIMESTAMP_ADD(")); assert!(sql.contains("INTERVAL 7 day)")); + let query_plan = convert_select_to_query_plan_customized( + " + SELECT DATEADD(MONTH, 7, order_date) AS d + FROM KibanaSampleDataEcommerce AS k + GROUP BY 1 + ORDER BY 1 DESC + " + .to_string(), + DatabaseProtocol::PostgreSQL, + bq_templates, + ) + .await; + + let physical_plan = query_plan.as_physical_plan().await.unwrap(); + println!( + "Physical plan: {}", + displayable(physical_plan.as_ref()).indent() + ); + + let logical_plan = query_plan.as_logical_plan(); + let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql; + println!("{}", sql); + assert!(sql.contains("TIMESTAMP(DATETIME_ADD(DATETIME(")); + assert!(sql.contains("INTERVAL 7 MONTH)")); + // Postgres let query_plan = convert_select_to_query_plan_customized( " @@ -14696,7 +14721,7 @@ ORDER BY "source"."str0" ASC .to_string(), DatabaseProtocol::PostgreSQL, vec![( - "functions/DATEADD".to_string(), + "functions/DATE_ADD".to_string(), "({{ args[2] }} + \'{{ interval }} {{ date_part }}\'::interval)".to_string(), )], ) From a821c81370a1691270fd1c5190af26179f0b1382 Mon Sep 17 00:00:00 2001 From: Konstantin Burkalev Date: Mon, 7 Apr 2025 22:25:31 +0300 Subject: [PATCH 3/6] fix tests --- rust/cubesql/cubesql/src/compile/mod.rs | 31 +++---------------------- 1 file changed, 3 insertions(+), 28 deletions(-) diff --git a/rust/cubesql/cubesql/src/compile/mod.rs b/rust/cubesql/cubesql/src/compile/mod.rs index e09ff8bc7b88e..aee3908f2fb8f 100644 --- a/rust/cubesql/cubesql/src/compile/mod.rs +++ b/rust/cubesql/cubesql/src/compile/mod.rs @@ -14633,36 +14633,11 @@ ORDER BY "source"."str0" ASC } init_testing_logger(); - let query_plan = convert_select_to_query_plan( - " - SELECT DATEADD(DAY, 7, order_date) AS d - FROM KibanaSampleDataEcommerce AS k - GROUP BY 1 - ORDER BY 1 DESC - " - .to_string(), - DatabaseProtocol::PostgreSQL, - ) - .await; - - let physical_plan = query_plan.as_physical_plan().await.unwrap(); - println!( - "Physical plan: {}", - displayable(physical_plan.as_ref()).indent() - ); - - let logical_plan = query_plan.as_logical_plan(); - assert!(logical_plan - .find_cube_scan_wrapped_sql() - .wrapped_sql - .sql - .contains("DATEADD(day, 7,")); - // BigQuery let bq_templates = vec![("functions/DATE_ADD".to_string(), "{% if date_part|upper in ['YEAR', 'MONTH', 'QUARTER'] %}TIMESTAMP(DATETIME_ADD(DATETIME({{ args[2] }}), INTERVAL {{ interval }} {{ date_part }})){% else %}TIMESTAMP_ADD({{ args[2] }}, INTERVAL {{ interval }} {{ date_part }}){% endif %}".to_string())]; let query_plan = convert_select_to_query_plan_customized( " - SELECT DATEADD(DAY, 7, order_date) AS d + SELECT DATE_ADD(DAY, 7, order_date) AS d FROM KibanaSampleDataEcommerce AS k GROUP BY 1 ORDER BY 1 DESC @@ -14687,7 +14662,7 @@ ORDER BY "source"."str0" ASC let query_plan = convert_select_to_query_plan_customized( " - SELECT DATEADD(MONTH, 7, order_date) AS d + SELECT DATE_ADD(MONTH, 7, order_date) AS d FROM KibanaSampleDataEcommerce AS k GROUP BY 1 ORDER BY 1 DESC @@ -14713,7 +14688,7 @@ ORDER BY "source"."str0" ASC // Postgres let query_plan = convert_select_to_query_plan_customized( " - SELECT DATEADD(DAY, 7, order_date) AS d + SELECT DATE_ADD(DAY, 7, order_date) AS d FROM KibanaSampleDataEcommerce AS k GROUP BY 1 ORDER BY 1 DESC From 30d408ce289b536ae56c2a0c77a9b9a8d6e29e07 Mon Sep 17 00:00:00 2001 From: Konstantin Burkalev Date: Tue, 8 Apr 2025 14:04:55 +0300 Subject: [PATCH 4/6] add interval + date_part for date_add udf --- .../cubesql/src/compile/engine/df/wrapper.rs | 78 ++++++++++++++++++- 1 file changed, 76 insertions(+), 2 deletions(-) diff --git a/rust/cubesql/cubesql/src/compile/engine/df/wrapper.rs b/rust/cubesql/cubesql/src/compile/engine/df/wrapper.rs index bcc7b3d0c3808..c8af1cad5ef84 100644 --- a/rust/cubesql/cubesql/src/compile/engine/df/wrapper.rs +++ b/rust/cubesql/cubesql/src/compile/engine/df/wrapper.rs @@ -2491,10 +2491,47 @@ impl CubeScanWrapperNode { if DATE_PART_REGEX.is_match(date_part) { Ok(Some(date_part.to_string())) } else { - Err(date_part_err(date_part)) + Err(date_part_err(date_part.to_string())) } } - _ => Err(date_part_err(&args[0].to_string())), + _ => Err(date_part_err(args[0].to_string())), + }, + "date_add" => match &args[1] { + Expr::Literal(ScalarValue::IntervalDayTime(Some(interval))) => { + let days = (*interval >> 32) as i32; + let ms = (*interval & 0xFFFF_FFFF) as i32; + + if days != 0 && ms == 0 { + Ok(Some("DAY".to_string())) + } else if ms != 0 && days == 0 { + Ok(Some("MILLISECOND".to_string())) + } else { + Err(DataFusionError::Internal(format!( + "Unsupported mixed IntervalDayTime: days = {days}, ms = {ms}" + ))) + } + } + Expr::Literal(ScalarValue::IntervalYearMonth(Some(_months))) => { + Ok(Some("MONTH".to_string())) + } + Expr::Literal(ScalarValue::IntervalMonthDayNano(Some(interval))) => { + let months = (interval >> 96) as i32; + let days = ((interval >> 64) & 0xFFFF_FFFF) as i32; + let nanos = *interval as i64; + + if months != 0 && days == 0 && nanos == 0 { + Ok(Some("MONTH".to_string())) + } else if days != 0 && months == 0 && nanos == 0 { + Ok(Some("DAY".to_string())) + } else if nanos != 0 && months == 0 && days == 0 { + Ok(Some("NANOSECOND".to_string())) + } else { + Err(DataFusionError::Internal(format!( + "Unsupported mixed IntervalMonthDayNano: months = {months}, days = {days}, nanos = {nanos}" + ))) + } + } + _ => Err(date_part_err(args[1].to_string())), }, _ => Ok(None), }?; @@ -2507,6 +2544,43 @@ impl CubeScanWrapperNode { "Can't generate SQL for scalar function: interval must be Int64" ))), }, + "date_add" => match &args[1] { + Expr::Literal(ScalarValue::IntervalDayTime(Some(interval))) => { + let days = (*interval >> 32) as i32; + let ms = (*interval & 0xFFFF_FFFF) as i32; + + if days != 0 && ms == 0 { + Ok(Some(days.to_string())) + } else if ms != 0 && days == 0 { + Ok(Some(ms.to_string())) + } else { + Err(DataFusionError::Internal(format!( + "Unsupported mixed IntervalDayTime: days = {days}, ms = {ms}" + ))) + } + } + Expr::Literal(ScalarValue::IntervalYearMonth(Some(months))) => { + Ok(Some(months.to_string())) + } + Expr::Literal(ScalarValue::IntervalMonthDayNano(Some(interval))) => { + let months = (interval >> 96) as i32; + let days = ((interval >> 64) & 0xFFFF_FFFF) as i32; + let nanos = *interval as i64; + + if months != 0 && days == 0 && nanos == 0 { + Ok(Some(months.to_string())) + } else if days != 0 && months == 0 && nanos == 0 { + Ok(Some(days.to_string())) + } else if nanos != 0 && months == 0 && days == 0 { + Ok(Some(nanos.to_string())) + } else { + Err(DataFusionError::Internal(format!( + "Unsupported mixed IntervalMonthDayNano: months = {months}, days = {days}, nanos = {nanos}" + ))) + } + } + _ => Err(date_part_err(args[1].to_string())), + }, _ => Ok(None), }?; let mut sql_args = Vec::new(); From 2466eec7f4acb974f06b42006d857baa62766d06 Mon Sep 17 00:00:00 2001 From: Konstantin Burkalev Date: Tue, 8 Apr 2025 14:05:02 +0300 Subject: [PATCH 5/6] fix template --- packages/cubejs-schema-compiler/src/adapter/BigqueryQuery.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/cubejs-schema-compiler/src/adapter/BigqueryQuery.ts b/packages/cubejs-schema-compiler/src/adapter/BigqueryQuery.ts index c431e2cf05d20..db7e0e4056f2c 100644 --- a/packages/cubejs-schema-compiler/src/adapter/BigqueryQuery.ts +++ b/packages/cubejs-schema-compiler/src/adapter/BigqueryQuery.ts @@ -252,7 +252,7 @@ export class BigqueryQuery extends BaseQuery { templates.functions.STRPOS = 'STRPOS({{ args_concat }})'; templates.functions.DATEDIFF = 'DATETIME_DIFF(CAST({{ args[2] }} AS DATETIME), CAST({{ args[1] }} AS DATETIME), {{ date_part }})'; // DATEADD is being rewritten to DATE_ADD - templates.functions.DATE_ADD = '{% if date_part|upper in [\'YEAR\', \'MONTH\', \'QUARTER\'] %}TIMESTAMP(DATETIME_ADD(DATETIME({{ args[2] }}), INTERVAL {{ interval }} {{ date_part }})){% else %}TIMESTAMP_ADD({{ args[2] }}, INTERVAL {{ interval }} {{ date_part }}){% endif %}'; + templates.functions.DATE_ADD = '{% if date_part|upper in [\'YEAR\', \'MONTH\', \'QUARTER\'] %}TIMESTAMP(DATETIME_ADD(DATETIME({{ args[0] }}), INTERVAL {{ interval }} {{ date_part }})){% else %}TIMESTAMP_ADD({{ args[0] }}, INTERVAL {{ interval }} {{ date_part }}){% endif %}'; templates.functions.CURRENTDATE = 'CURRENT_DATE'; delete templates.functions.TO_CHAR; templates.expressions.binary = '{% if op == \'%\' %}MOD({{ left }}, {{ right }}){% else %}({{ left }} {{ op }} {{ right }}){% endif %}'; From 7c06958d9b305df9df0b24d6c7b17b3bf70c22e9 Mon Sep 17 00:00:00 2001 From: Konstantin Burkalev Date: Tue, 8 Apr 2025 14:05:12 +0300 Subject: [PATCH 6/6] add/fix tests --- rust/cubesql/cubesql/src/compile/mod.rs | 110 ++++++++++++++++++++---- 1 file changed, 94 insertions(+), 16 deletions(-) diff --git a/rust/cubesql/cubesql/src/compile/mod.rs b/rust/cubesql/cubesql/src/compile/mod.rs index aee3908f2fb8f..b3840407504d3 100644 --- a/rust/cubesql/cubesql/src/compile/mod.rs +++ b/rust/cubesql/cubesql/src/compile/mod.rs @@ -14624,21 +14624,46 @@ ORDER BY "source"."str0" ASC assert!(sql.contains("EXTRACT(EPOCH FROM")); } - // redshift-dateadd-[literal-date32-]to-interval rewrites DATEADD to DATE_ADD #[tokio::test] - #[ignore] async fn test_dateadd_push_down() { if !Rewriter::sql_push_down_enabled() { return; } init_testing_logger(); - // BigQuery - let bq_templates = vec![("functions/DATE_ADD".to_string(), "{% if date_part|upper in ['YEAR', 'MONTH', 'QUARTER'] %}TIMESTAMP(DATETIME_ADD(DATETIME({{ args[2] }}), INTERVAL {{ interval }} {{ date_part }})){% else %}TIMESTAMP_ADD({{ args[2] }}, INTERVAL {{ interval }} {{ date_part }}){% endif %}".to_string())]; + // Redshift function DATEADD + let query_plan = convert_select_to_query_plan( + " + SELECT DATEADD(DAY, 7, order_date) AS d + FROM KibanaSampleDataEcommerce AS k + WHERE LOWER(customer_gender) = 'test' + GROUP BY 1 + ORDER BY 1 DESC + " + .to_string(), + DatabaseProtocol::PostgreSQL, + ) + .await; + + let physical_plan = query_plan.as_physical_plan().await.unwrap(); + println!( + "Physical plan: {}", + displayable(physical_plan.as_ref()).indent() + ); + + let logical_plan = query_plan.as_logical_plan(); + let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql; + // redshift-dateadd-[literal-date32-]to-interval rewrites DATEADD to DATE_ADD + assert!(sql.contains("DATE_ADD(")); + assert!(sql.contains("INTERVAL '7 DAY')")); + + // BigQuery + Postgres DATE_ADD + DAYS + let bq_templates = vec![("functions/DATE_ADD".to_string(), "{% if date_part|upper in ['YEAR', 'MONTH', 'QUARTER'] %}TIMESTAMP(DATETIME_ADD(DATETIME({{ args[0] }}), INTERVAL {{ interval }} {{ date_part }})){% else %}TIMESTAMP_ADD({{ args[0] }}, INTERVAL {{ interval }} {{ date_part }}){% endif %}".to_string())]; let query_plan = convert_select_to_query_plan_customized( " - SELECT DATE_ADD(DAY, 7, order_date) AS d + SELECT DATE_ADD(order_date, INTERVAL '7 DAYS') AS d FROM KibanaSampleDataEcommerce AS k + WHERE LOWER(customer_gender) = 'test' GROUP BY 1 ORDER BY 1 DESC " @@ -14656,14 +14681,42 @@ ORDER BY "source"."str0" ASC let logical_plan = query_plan.as_logical_plan(); let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql; - println!("{}", sql); assert!(sql.contains("TIMESTAMP_ADD(")); - assert!(sql.contains("INTERVAL 7 day)")); + assert!(sql.contains("INTERVAL 7 DAY)")); + // BigQuery + Redshift DATEADD + DAYS + let bq_templates = vec![("functions/DATE_ADD".to_string(), "{% if date_part|upper in ['YEAR', 'MONTH', 'QUARTER'] %}TIMESTAMP(DATETIME_ADD(DATETIME({{ args[0] }}), INTERVAL {{ interval }} {{ date_part }})){% else %}TIMESTAMP_ADD({{ args[0] }}, INTERVAL {{ interval }} {{ date_part }}){% endif %}".to_string())]; let query_plan = convert_select_to_query_plan_customized( " - SELECT DATE_ADD(MONTH, 7, order_date) AS d + SELECT DATEADD(DAY, 7, order_date) AS d FROM KibanaSampleDataEcommerce AS k + WHERE LOWER(customer_gender) = 'test' + GROUP BY 1 + ORDER BY 1 DESC + " + .to_string(), + DatabaseProtocol::PostgreSQL, + bq_templates.clone(), + ) + .await; + + let physical_plan = query_plan.as_physical_plan().await.unwrap(); + println!( + "Physical plan: {}", + displayable(physical_plan.as_ref()).indent() + ); + + let logical_plan = query_plan.as_logical_plan(); + let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql; + assert!(sql.contains("TIMESTAMP_ADD(")); + assert!(sql.contains("INTERVAL 7 DAY)")); + + // BigQuery + Postgres DATE_ADD + MONTHS + let query_plan = convert_select_to_query_plan_customized( + " + SELECT DATE_ADD(order_date, INTERVAL '7 MONTHS') AS d + FROM KibanaSampleDataEcommerce AS k + WHERE LOWER(customer_gender) = 'test' GROUP BY 1 ORDER BY 1 DESC " @@ -14681,24 +14734,48 @@ ORDER BY "source"."str0" ASC let logical_plan = query_plan.as_logical_plan(); let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql; - println!("{}", sql); assert!(sql.contains("TIMESTAMP(DATETIME_ADD(DATETIME(")); assert!(sql.contains("INTERVAL 7 MONTH)")); - // Postgres + // BigQuery + Redshift DATEADD + MONTHS + let bq_templates = vec![("functions/DATE_ADD".to_string(), "{% if date_part|upper in ['YEAR', 'MONTH', 'QUARTER'] %}TIMESTAMP(DATETIME_ADD(DATETIME({{ args[0] }}), INTERVAL {{ interval }} {{ date_part }})){% else %}TIMESTAMP_ADD({{ args[0] }}, INTERVAL {{ interval }} {{ date_part }}){% endif %}".to_string())]; + let query_plan = convert_select_to_query_plan_customized( + " + SELECT DATEADD(MONTH, 7, order_date) AS d + FROM KibanaSampleDataEcommerce AS k + WHERE LOWER(customer_gender) = 'test' + GROUP BY 1 + ORDER BY 1 DESC + " + .to_string(), + DatabaseProtocol::PostgreSQL, + bq_templates.clone(), + ) + .await; + + let physical_plan = query_plan.as_physical_plan().await.unwrap(); + println!( + "Physical plan: {}", + displayable(physical_plan.as_ref()).indent() + ); + + let logical_plan = query_plan.as_logical_plan(); + let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql; + assert!(sql.contains("TIMESTAMP(DATETIME_ADD(DATETIME(")); + assert!(sql.contains("INTERVAL 7 MONTH)")); + + // Postgres DATE_ADD let query_plan = convert_select_to_query_plan_customized( " - SELECT DATE_ADD(DAY, 7, order_date) AS d + SELECT DATE_ADD(order_date, INTERVAL '7 DAYS') AS d FROM KibanaSampleDataEcommerce AS k + WHERE LOWER(customer_gender) = 'test' GROUP BY 1 ORDER BY 1 DESC " .to_string(), DatabaseProtocol::PostgreSQL, - vec![( - "functions/DATE_ADD".to_string(), - "({{ args[2] }} + \'{{ interval }} {{ date_part }}\'::interval)".to_string(), - )], + vec![], ) .await; @@ -14710,7 +14787,8 @@ ORDER BY "source"."str0" ASC let logical_plan = query_plan.as_logical_plan(); let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql; - assert!(sql.contains("+ '7 day'::interval")); + assert!(sql.contains("DATE_ADD(")); + assert!(sql.contains("INTERVAL '7 DAY'")); } #[tokio::test]