diff --git a/sqlglot/dialects/bigquery.py b/sqlglot/dialects/bigquery.py index 34b951320a..c71246804e 100644 --- a/sqlglot/dialects/bigquery.py +++ b/sqlglot/dialects/bigquery.py @@ -242,6 +242,24 @@ def _build_datetime(args: t.List) -> exp.Func: return exp.TimestampFromParts.from_arg_list(args) +def build_date_diff(args: t.List) -> exp.Expression: + expr = exp.DateDiff( + this=seq_get(args, 0), + expression=seq_get(args, 1), + unit=seq_get(args, 2), + date_part_boundary=True, + ) + + # Normalize plain WEEK to WEEK(SUNDAY) to preserve the semantic in the AST to facilitate transpilation + # This is done post exp.DateDiff construction since the TimeUnit mixin performs canonicalizations in its constructor too + unit = expr.args.get("unit") + + if isinstance(unit, exp.Var) and unit.name.upper() == "WEEK": + expr.set("unit", exp.WeekStart(this=exp.var("SUNDAY"))) + + return expr + + def _build_regexp_extract( expr_type: t.Type[E], default_group: t.Optional[exp.Expression] = None ) -> t.Callable[[t.List, BigQuery], E]: @@ -564,6 +582,7 @@ class Parser(parser.Parser): "CONTAINS_SUBSTR": _build_contains_substring, "DATE": _build_date, "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), + "DATE_DIFF": build_date_diff, "DATE_SUB": build_date_delta_with_interval(exp.DateSub), "DATE_TRUNC": lambda args: exp.DateTrunc( unit=seq_get(args, 1), diff --git a/sqlglot/dialects/duckdb.py b/sqlglot/dialects/duckdb.py index 8c19388357..dd1db78ea7 100644 --- a/sqlglot/dialects/duckdb.py +++ b/sqlglot/dialects/duckdb.py @@ -69,6 +69,18 @@ "\u001f": 31, } +# Days of week to ISO 8601 day-of-week numbers +# ISO 8601 standard: Monday=1, Tuesday=2, Wednesday=3, Thursday=4, Friday=5, Saturday=6, Sunday=7 +WEEK_START_DAY_TO_DOW = { + "MONDAY": 1, + "TUESDAY": 2, + "WEDNESDAY": 3, + "THURSDAY": 4, + "FRIDAY": 5, + "SATURDAY": 6, + "SUNDAY": 7, +} + # BigQuery -> DuckDB conversion for the DATE function def _date_sql(self: DuckDB.Generator, expression: exp.Date) -> str: @@ -250,9 +262,86 @@ def _implicit_datetime_cast( return arg +def _week_unit_to_dow(unit: t.Optional[exp.Expression]) -> t.Optional[int]: + """ + Compute the Monday-based day shift to align DATE_DIFF('WEEK', ...) coming + from other dialects, e.g BigQuery's WEEK() or ISOWEEK unit parts. + + Args: + unit: The unit expression (Var for ISOWEEK or WeekStart) + + Returns: + The ISO 8601 day number (Monday=1, Sunday=7 etc) or None if not a week unit or if day is dynamic (not a constant). + + Examples: + "WEEK(SUNDAY)" -> 7 + "WEEK(MONDAY)" -> 1 + "ISOWEEK" -> 1 + """ + # Handle plain Var expressions for ISOWEEK only + if isinstance(unit, exp.Var) and unit.name.upper() in "ISOWEEK": + return 1 + + # Handle WeekStart expressions with explicit day + if isinstance(unit, exp.WeekStart): + return WEEK_START_DAY_TO_DOW.get(unit.name.upper()) + + return None + + +def _build_week_trunc_expression(date_expr: exp.Expression, start_dow: int) -> exp.Expression: + """ + Build DATE_TRUNC expression for week boundaries with custom start day. + + Args: + date_expr: The date expression to truncate + shift_days: ISO 8601 day-of-week number (Monday=0, ..., Sunday=6) + + DuckDB's DATE_TRUNC('WEEK', ...) aligns weeks to Monday (ISO standard). + To align to a different start day, we shift the date before truncating. + + Shift formula: Sunday (7) gets +1, others get (1 - start_dow) + Examples: + Monday (1): shift = 0 (no shift needed) + Tuesday (2): shift = -1 (shift back 1 day) ... + Sunday (7): shift = +1 (shift forward 1 day, wraps to next Monday-based week) + """ + shift_days = 1 if start_dow == 7 else 1 - start_dow + + # Shift date to align week boundaries with the desired start day + # No shift needed for Monday-based weeks (shift_days == 0) + shifted_date = ( + exp.DateAdd( + this=date_expr, + expression=exp.Interval(this=exp.Literal.string(str(shift_days)), unit=exp.var("DAY")), + ) + if shift_days != 0 + else date_expr + ) + + return exp.DateTrunc(unit=exp.var("WEEK"), this=shifted_date) + + def _date_diff_sql(self: DuckDB.Generator, expression: exp.DateDiff) -> str: this = _implicit_datetime_cast(expression.this) expr = _implicit_datetime_cast(expression.expression) + unit = expression.args.get("unit") + + # DuckDB's WEEK diff does not respect Monday crossing (week boundaries), it checks (end_day - start_day) / 7: + # SELECT DATE_DIFF('WEEK', CAST('2024-12-13' AS DATE), CAST('2024-12-17' AS DATE)) --> 0 (Monday crossed) + # SELECT DATE_DIFF('WEEK', CAST('2024-12-13' AS DATE), CAST('2024-12-20' AS DATE)) --> 1 (7 days difference) + # Whereas for other units such as MONTH it does respect month boundaries: + # SELECT DATE_DIFF('MONTH', CAST('2024-11-30' AS DATE), CAST('2024-12-01' AS DATE)) --> 1 (Month crossed) + date_part_boundary = expression.args.get("date_part_boundary") + + # Extract week start day; returns None if day is dynamic (column/placeholder) + week_start = _week_unit_to_dow(unit) + if date_part_boundary and week_start and this and expr: + expression.set("unit", exp.Literal.string("WEEK")) + + # Truncate both dates to week boundaries to respect input dialect semantics + this = _build_week_trunc_expression(this, week_start) + expr = _build_week_trunc_expression(expr, week_start) return self.func("DATE_DIFF", unit_to_str(expression), expr, this) diff --git a/sqlglot/expressions.py b/sqlglot/expressions.py index 79c1eed917..833773203c 100644 --- a/sqlglot/expressions.py +++ b/sqlglot/expressions.py @@ -6398,7 +6398,14 @@ class DateSub(Func, IntervalOp): class DateDiff(Func, TimeUnit): _sql_names = ["DATEDIFF", "DATE_DIFF"] - arg_types = {"this": True, "expression": True, "unit": False, "zone": False, "big_int": False} + arg_types = { + "this": True, + "expression": True, + "unit": False, + "zone": False, + "big_int": False, + "date_part_boundary": False, + } class DateTrunc(Func): diff --git a/sqlglot/generator.py b/sqlglot/generator.py index 6740c93a33..ff137cd6f5 100644 --- a/sqlglot/generator.py +++ b/sqlglot/generator.py @@ -226,7 +226,6 @@ class Generator(metaclass=_Generator): exp.VarMap: lambda self, e: self.func("MAP", e.args["keys"], e.args["values"]), exp.ViewAttributeProperty: lambda self, e: f"WITH {self.sql(e, 'this')}", exp.VolatileProperty: lambda *_: "VOLATILE", - exp.WeekStart: lambda self, e: f"WEEK({self.sql(e, 'this')})", exp.WithJournalTableProperty: lambda self, e: f"WITH JOURNAL TABLE={self.sql(e, 'this')}", exp.WithProcedureOptions: lambda self, e: f"WITH {self.expressions(e, flat=True)}", exp.WithSchemaBindingProperty: lambda self, e: f"WITH SCHEMA {self.sql(e, 'this')}", @@ -5456,3 +5455,11 @@ def localtime_sql(self, expression: exp.Localtime) -> str: def localtimestamp_sql(self, expression: exp.Localtime) -> str: this = expression.this return self.func("LOCALTIMESTAMP", this) if this else "LOCALTIMESTAMP" + + def weekstart_sql(self, expression: exp.WeekStart) -> str: + this = expression.this.name.upper() + if self.dialect.WEEK_OFFSET == -1 and this == "SUNDAY": + # BigQuery specific optimization since WEEK(SUNDAY) == WEEK + return "WEEK" + + return self.func("WEEK", expression.this) diff --git a/tests/dialects/test_bigquery.py b/tests/dialects/test_bigquery.py index e89694a2de..a3d16cbdca 100644 --- a/tests/dialects/test_bigquery.py +++ b/tests/dialects/test_bigquery.py @@ -3232,7 +3232,7 @@ def test_week(self): self.validate_identity("DATE_TRUNC(date, WEEK(MONDAY))") self.validate_identity( "LAST_DAY(DATETIME '2008-11-10 15:30:00', WEEK(SUNDAY))", - "LAST_DAY(CAST('2008-11-10 15:30:00' AS DATETIME), WEEK(SUNDAY))", + "LAST_DAY(CAST('2008-11-10 15:30:00' AS DATETIME), WEEK)", ) self.validate_identity("DATE_DIFF('2017-12-18', '2017-12-17', WEEK(SATURDAY))") self.validate_identity("DATETIME_DIFF('2017-12-18', '2017-12-17', WEEK(MONDAY))") @@ -3241,6 +3241,81 @@ def test_week(self): "EXTRACT(WEEK(THURSDAY) FROM CAST('2013-12-25' AS DATE))", ) + # BigQuery → DuckDB transpilation tests for DATE_DIFF with week units + self.validate_all( + "SELECT DATE_DIFF('2024-06-15', '2024-01-08', WEEK(MONDAY))", + write={ + "bigquery": "SELECT DATE_DIFF('2024-06-15', '2024-01-08', WEEK(MONDAY))", + "duckdb": "SELECT DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST('2024-01-08' AS DATE)), DATE_TRUNC('WEEK', CAST('2024-06-15' AS DATE)))", + }, + ) + self.validate_all( + "SELECT DATE_DIFF('2026-01-15', '2024-01-08', WEEK(SUNDAY))", + write={ + "bigquery": "SELECT DATE_DIFF('2026-01-15', '2024-01-08', WEEK)", + "duckdb": "SELECT DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST('2024-01-08' AS DATE) + INTERVAL '1' DAY), DATE_TRUNC('WEEK', CAST('2026-01-15' AS DATE) + INTERVAL '1' DAY))", + }, + ) + self.validate_all( + "SELECT DATE_DIFF('2024-01-15', '2022-04-28', WEEK(SATURDAY))", + write={ + "bigquery": "SELECT DATE_DIFF('2024-01-15', '2022-04-28', WEEK(SATURDAY))", + "duckdb": "SELECT DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST('2022-04-28' AS DATE) + INTERVAL '-5' DAY), DATE_TRUNC('WEEK', CAST('2024-01-15' AS DATE) + INTERVAL '-5' DAY))", + }, + ) + self.validate_all( + "SELECT DATE_DIFF('2024-01-15', '2024-01-08', WEEK)", + write={ + "bigquery": "SELECT DATE_DIFF('2024-01-15', '2024-01-08', WEEK)", + "duckdb": "SELECT DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST('2024-01-08' AS DATE) + INTERVAL '1' DAY), DATE_TRUNC('WEEK', CAST('2024-01-15' AS DATE) + INTERVAL '1' DAY))", + }, + ) + # Test WEEK - Saturday to Sunday boundary (critical test for Sunday-start weeks) + # In BigQuery: Saturday -> Sunday crosses week boundary = 1 week + # Without fix: DuckDB treats as Monday-start weeks = 0 weeks (both in same week) + self.validate_all( + "SELECT DATE_DIFF('2024-01-07', '2024-01-06', WEEK)", + write={ + "bigquery": "SELECT DATE_DIFF('2024-01-07', '2024-01-06', WEEK)", + "duckdb": "SELECT DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST('2024-01-06' AS DATE) + INTERVAL '1' DAY), DATE_TRUNC('WEEK', CAST('2024-01-07' AS DATE) + INTERVAL '1' DAY))", + }, + ) + self.validate_all( + "SELECT DATE_DIFF('2024-01-15', '2024-01-08', ISOWEEK)", + write={ + "bigquery": "SELECT DATE_DIFF('2024-01-15', '2024-01-08', ISOWEEK)", + "duckdb": "SELECT DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST('2024-01-08' AS DATE)), DATE_TRUNC('WEEK', CAST('2024-01-15' AS DATE)))", + }, + ) + self.validate_all( + "SELECT DATE_DIFF(DATE '2024-09-15', DATE '2024-01-08', WEEK(MONDAY))", + write={ + "bigquery": "SELECT DATE_DIFF(CAST('2024-09-15' AS DATE), CAST('2024-01-08' AS DATE), WEEK(MONDAY))", + "duckdb": "SELECT DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST('2024-01-08' AS DATE)), DATE_TRUNC('WEEK', CAST('2024-09-15' AS DATE)))", + }, + ) + self.validate_all( + "SELECT DATE_DIFF(DATE '2024-01-01', DATE '2024-01-15', WEEK(SUNDAY))", + write={ + "bigquery": "SELECT DATE_DIFF(CAST('2024-01-01' AS DATE), CAST('2024-01-15' AS DATE), WEEK)", + "duckdb": "SELECT DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST('2024-01-15' AS DATE) + INTERVAL '1' DAY), DATE_TRUNC('WEEK', CAST('2024-01-01' AS DATE) + INTERVAL '1' DAY))", + }, + ) + self.validate_all( + "SELECT DATE_DIFF(DATE '2023-05-01', DATE '2024-01-15', ISOWEEK)", + write={ + "bigquery": "SELECT DATE_DIFF(CAST('2023-05-01' AS DATE), CAST('2024-01-15' AS DATE), ISOWEEK)", + "duckdb": "SELECT DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST('2024-01-15' AS DATE)), DATE_TRUNC('WEEK', CAST('2023-05-01' AS DATE)))", + }, + ) + self.validate_all( + "SELECT DATE_DIFF(DATE '2024-01-01', DATE '2024-01-15', DAY)", + write={ + "bigquery": "SELECT DATE_DIFF(CAST('2024-01-01' AS DATE), CAST('2024-01-15' AS DATE), DAY)", + "duckdb": "SELECT DATE_DIFF('DAY', CAST('2024-01-15' AS DATE), CAST('2024-01-01' AS DATE))", + }, + ) + def test_approx_qunatiles(self): self.validate_identity("APPROX_QUANTILES(foo, 2)") self.validate_identity("APPROX_QUANTILES(DISTINCT foo, 2 RESPECT NULLS)")