From 45cacb624f62ad906ca83a9c805173ad90b726a4 Mon Sep 17 00:00:00 2001 From: Thomas Langton <155970791+tlangton3@users.noreply.github.com> Date: Tue, 10 Jun 2025 13:46:41 +0100 Subject: [PATCH] fix(BigqueryQuery): Prevent double-wrapping of TIMESTAMP() in SQL generation Updated the SQL generation logic to ensure that TIMESTAMP() is not wrapped in DATETIME() for BigQuery queries. Added regression tests to verify the correct behavior in both standard and SQL pushdown contexts. --- .../src/adapter/BigqueryQuery.ts | 4 +- .../test/unit/base-query.test.ts | 67 +++++++++++++++++++ rust/cubesql/cubesql/src/compile/mod.rs | 52 ++++++++++++++ 3 files changed, 121 insertions(+), 2 deletions(-) diff --git a/packages/cubejs-schema-compiler/src/adapter/BigqueryQuery.ts b/packages/cubejs-schema-compiler/src/adapter/BigqueryQuery.ts index c13e26d224f37..1bf6d7288406f 100644 --- a/packages/cubejs-schema-compiler/src/adapter/BigqueryQuery.ts +++ b/packages/cubejs-schema-compiler/src/adapter/BigqueryQuery.ts @@ -42,7 +42,7 @@ export class BigqueryQuery extends BaseQuery { } public convertTz(field) { - return `DATETIME(${this.timeStampCast(field)}, '${this.timezone}')`; + return `DATETIME(${field}, '${this.timezone}')`; } public timeStampCast(value) { @@ -263,7 +263,7 @@ export class BigqueryQuery extends BaseQuery { templates.expressions.binary = '{% if op == \'%\' %}MOD({{ left }}, {{ right }}){% else %}({{ left }} {{ op }} {{ right }}){% endif %}'; templates.expressions.interval = 'INTERVAL {{ interval }}'; templates.expressions.extract = 'EXTRACT({% if date_part == \'DOW\' %}DAYOFWEEK{% elif date_part == \'DOY\' %}DAYOFYEAR{% else %}{{ date_part }}{% endif %} FROM {{ expr }})'; - templates.expressions.timestamp_literal = 'DATETIME(TIMESTAMP(\'{{ value }}\'))'; + templates.expressions.timestamp_literal = 'TIMESTAMP(\'{{ value }}\')'; delete templates.expressions.ilike; delete templates.expressions.like_escape; templates.filters.like_pattern = 'CONCAT({% if start_wild %}\'%\'{% else %}\'\'{% endif %}, LOWER({{ value }}), {% if end_wild %}\'%\'{% else %}\'\'{% endif %})'; diff --git a/packages/cubejs-schema-compiler/test/unit/base-query.test.ts b/packages/cubejs-schema-compiler/test/unit/base-query.test.ts index 3fc1822f338c4..8fc796637642c 100644 --- a/packages/cubejs-schema-compiler/test/unit/base-query.test.ts +++ b/packages/cubejs-schema-compiler/test/unit/base-query.test.ts @@ -1012,6 +1012,73 @@ describe('SQL Generation', () => { }) ); + it('BigQuery timestamp literal regression test - should not double-wrap DATETIME(TIMESTAMP())', async () => { + await compilers.compiler.compile(); + + const query = new BigqueryQuery(compilers, { + measures: [ + 'cards.count' + ], + timeDimensions: [{ + dimension: 'cards.createdAt', + granularity: 'day', + dateRange: ['2025-06-03', '2025-06-10'] + }], + filters: [], + timezone: 'UTC' + }); + + const queryAndParams = query.buildSqlAndParams(); + const sql = queryAndParams[0]; + + // Should contain TIMESTAMP() but NOT DATETIME(TIMESTAMP()) + expect(sql).toContain('TIMESTAMP('); + expect(sql).not.toContain('DATETIME(TIMESTAMP('); + // Should contain correct BigQuery timestamp comparison syntax with parameters + expect(sql).toContain('TIMESTAMP(?)'); + // Should contain the properly formatted DATETIME function without double wrapping + expect(sql).toContain('DATETIME(`cards`.created_at, \'UTC\')'); + }); + + it('BigQuery SQL pushdown timestamp literal test - should not double-wrap DATETIME(TIMESTAMP())', async () => { + await compilers.compiler.compile(); + + const query = new BigqueryQuery(compilers, { + measures: [ + 'cards.count' + ], + timeDimensions: [{ + dimension: 'cards.createdAt', + granularity: 'day', + dateRange: ['2025-06-03', '2025-06-10'] + }], + filters: [{ + member: 'cards.createdAt', + operator: 'inDateRange', + values: ['2025-06-03', '2025-06-10'] + }], + timezone: 'UTC', + // Simulate SQL pushdown scenario with external query context + ungrouped: true + }); + + const queryAndParams = query.buildSqlAndParams(); + const sql = queryAndParams[0]; + + console.log('Generated SQL pushdown:', sql); + + // Should contain TIMESTAMP() but NOT DATETIME(TIMESTAMP()) in pushdown context + expect(sql).toContain('TIMESTAMP('); + expect(sql).not.toContain('DATETIME(TIMESTAMP('); + // Should contain correct BigQuery timestamp comparison syntax with parameters + expect(sql).toContain('TIMESTAMP(?)'); + // Should contain the properly formatted DATETIME function without double wrapping + expect(sql).toContain('DATETIME(`cards`.created_at, \'UTC\')'); + // Should handle date range filters correctly in pushdown context + expect(sql).toMatch(/DATETIME\(`cards`\.created_at, 'UTC'\) >= TIMESTAMP\(\?\)/); + expect(sql).toMatch(/DATETIME\(`cards`\.created_at, 'UTC'\) <= TIMESTAMP\(\?\)/); + }); + it('Test time series with 6 digits timestamp precision - bigquery', async () => { await compilers.compiler.compile(); diff --git a/rust/cubesql/cubesql/src/compile/mod.rs b/rust/cubesql/cubesql/src/compile/mod.rs index 9da6c6b5facbd..ee2bede1091da 100644 --- a/rust/cubesql/cubesql/src/compile/mod.rs +++ b/rust/cubesql/cubesql/src/compile/mod.rs @@ -16970,4 +16970,56 @@ LIMIT {{ limit }}{% endif %}"#.to_string(), displayable(physical_plan.as_ref()).indent() ); } + + #[tokio::test] + async fn test_bigquery_timestamp_literal_regression() { + if !Rewriter::sql_push_down_enabled() { + return; + } + init_testing_logger(); + + let bq_templates = vec![ + ("expressions/timestamp_literal".to_string(), "TIMESTAMP('{{ value }}')".to_string()), + ]; + + let query_plan = convert_select_to_query_plan_customized( + r#" + SELECT COUNT(*) AS count_orders, LOWER(customer_gender) AS gender + FROM KibanaSampleDataEcommerce + WHERE order_date >= TIMESTAMP '2025-06-03 00:00:00' + AND order_date <= TIMESTAMP '2025-06-10 23:59:59' + AND LOWER(customer_gender) = 'test' + GROUP BY LOWER(customer_gender) + ORDER BY 1 DESC + "# + .to_string(), + DatabaseProtocol::PostgreSQL, + bq_templates, + ) + .await; + + let logical_plan = query_plan.as_logical_plan(); + let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql; + + println!("Generated BigQuery SQL: {}", sql); + + // Should contain TIMESTAMP() but NOT DATETIME(TIMESTAMP()) + assert!(sql.contains("TIMESTAMP(")); + assert!(!sql.contains("DATETIME(TIMESTAMP(")); + + // Should contain correct BigQuery timestamp literal syntax + assert!(sql.contains("TIMESTAMP('2025-06-03T00:00:00.000Z')")); + assert!(sql.contains("TIMESTAMP('2025-06-10T23:59:59.000Z')")); + + // The key test: ensure we don't have the double-wrapping issue + // In the broken version, BigQuery would generate DATETIME(TIMESTAMP()) instead of just TIMESTAMP() + // Our fix ensures timestamp literals are not double-wrapped + assert!(!sql.contains("DATETIME(TIMESTAMP(")); + + let physical_plan = query_plan.as_physical_plan().await.unwrap(); + println!( + "Physical plan: {}", + displayable(physical_plan.as_ref()).indent() + ); + } }