Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions sqlglot/dialects/bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,24 @@ def _build_datetime(args: t.List) -> exp.Func:
return exp.TimestampFromParts.from_arg_list(args)


def build_date_diff(args: t.List) -> exp.Expression:
expr = exp.DateDiff(
this=seq_get(args, 0),
expression=seq_get(args, 1),
unit=seq_get(args, 2),
date_part_boundary=True,
)

# Normalize plain WEEK to WEEK(SUNDAY) to preserve the semantic in the AST to facilitate transpilation
# This is done post exp.DateDiff construction since the TimeUnit mixin performs canonicalizations in its constructor too
unit = expr.args.get("unit")

if isinstance(unit, exp.Var) and unit.name.upper() == "WEEK":
expr.set("unit", exp.WeekStart(this=exp.var("SUNDAY")))

return expr


def _build_regexp_extract(
expr_type: t.Type[E], default_group: t.Optional[exp.Expression] = None
) -> t.Callable[[t.List, BigQuery], E]:
Expand Down Expand Up @@ -564,6 +582,7 @@ class Parser(parser.Parser):
"CONTAINS_SUBSTR": _build_contains_substring,
"DATE": _build_date,
"DATE_ADD": build_date_delta_with_interval(exp.DateAdd),
"DATE_DIFF": build_date_diff,
"DATE_SUB": build_date_delta_with_interval(exp.DateSub),
"DATE_TRUNC": lambda args: exp.DateTrunc(
unit=seq_get(args, 1),
Expand Down
89 changes: 89 additions & 0 deletions sqlglot/dialects/duckdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,18 @@
"\u001f": 31,
}

# Days of week to ISO 8601 day-of-week numbers
# ISO 8601 standard: Monday=1, Tuesday=2, Wednesday=3, Thursday=4, Friday=5, Saturday=6, Sunday=7
WEEK_START_DAY_TO_DOW = {
"MONDAY": 1,
"TUESDAY": 2,
"WEDNESDAY": 3,
"THURSDAY": 4,
"FRIDAY": 5,
"SATURDAY": 6,
"SUNDAY": 7,
}


# BigQuery -> DuckDB conversion for the DATE function
def _date_sql(self: DuckDB.Generator, expression: exp.Date) -> str:
Expand Down Expand Up @@ -250,9 +262,86 @@ def _implicit_datetime_cast(
return arg


def _week_unit_to_dow(unit: t.Optional[exp.Expression]) -> t.Optional[int]:
"""
Compute the Monday-based day shift to align DATE_DIFF('WEEK', ...) coming
from other dialects, e.g BigQuery's WEEK(<day>) or ISOWEEK unit parts.

Args:
unit: The unit expression (Var for ISOWEEK or WeekStart)

Returns:
The ISO 8601 day number (Monday=1, Sunday=7 etc) or None if not a week unit or if day is dynamic (not a constant).

Examples:
"WEEK(SUNDAY)" -> 7
"WEEK(MONDAY)" -> 1
"ISOWEEK" -> 1
"""
# Handle plain Var expressions for ISOWEEK only
if isinstance(unit, exp.Var) and unit.name.upper() in "ISOWEEK":
return 1

# Handle WeekStart expressions with explicit day
if isinstance(unit, exp.WeekStart):
return WEEK_START_DAY_TO_DOW.get(unit.name.upper())

return None


def _build_week_trunc_expression(date_expr: exp.Expression, start_dow: int) -> exp.Expression:
"""
Build DATE_TRUNC expression for week boundaries with custom start day.

Args:
date_expr: The date expression to truncate
shift_days: ISO 8601 day-of-week number (Monday=0, ..., Sunday=6)

DuckDB's DATE_TRUNC('WEEK', ...) aligns weeks to Monday (ISO standard).
To align to a different start day, we shift the date before truncating.

Shift formula: Sunday (7) gets +1, others get (1 - start_dow)
Examples:
Monday (1): shift = 0 (no shift needed)
Tuesday (2): shift = -1 (shift back 1 day) ...
Sunday (7): shift = +1 (shift forward 1 day, wraps to next Monday-based week)
"""
shift_days = 1 if start_dow == 7 else 1 - start_dow

# Shift date to align week boundaries with the desired start day
# No shift needed for Monday-based weeks (shift_days == 0)
shifted_date = (
exp.DateAdd(
this=date_expr,
expression=exp.Interval(this=exp.Literal.string(str(shift_days)), unit=exp.var("DAY")),
)
if shift_days != 0
else date_expr
)

return exp.DateTrunc(unit=exp.var("WEEK"), this=shifted_date)


def _date_diff_sql(self: DuckDB.Generator, expression: exp.DateDiff) -> str:
this = _implicit_datetime_cast(expression.this)
expr = _implicit_datetime_cast(expression.expression)
unit = expression.args.get("unit")

# DuckDB's WEEK diff does not respect Monday crossing (week boundaries), it checks (end_day - start_day) / 7:
# SELECT DATE_DIFF('WEEK', CAST('2024-12-13' AS DATE), CAST('2024-12-17' AS DATE)) --> 0 (Monday crossed)
# SELECT DATE_DIFF('WEEK', CAST('2024-12-13' AS DATE), CAST('2024-12-20' AS DATE)) --> 1 (7 days difference)
# Whereas for other units such as MONTH it does respect month boundaries:
# SELECT DATE_DIFF('MONTH', CAST('2024-11-30' AS DATE), CAST('2024-12-01' AS DATE)) --> 1 (Month crossed)
date_part_boundary = expression.args.get("date_part_boundary")

# Extract week start day; returns None if day is dynamic (column/placeholder)
week_start = _week_unit_to_dow(unit)
if date_part_boundary and week_start and this and expr:
expression.set("unit", exp.Literal.string("WEEK"))

# Truncate both dates to week boundaries to respect input dialect semantics
this = _build_week_trunc_expression(this, week_start)
expr = _build_week_trunc_expression(expr, week_start)

return self.func("DATE_DIFF", unit_to_str(expression), expr, this)

Expand Down
9 changes: 8 additions & 1 deletion sqlglot/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -6398,7 +6398,14 @@ class DateSub(Func, IntervalOp):

class DateDiff(Func, TimeUnit):
_sql_names = ["DATEDIFF", "DATE_DIFF"]
arg_types = {"this": True, "expression": True, "unit": False, "zone": False, "big_int": False}
arg_types = {
"this": True,
"expression": True,
"unit": False,
"zone": False,
"big_int": False,
"date_part_boundary": False,
}


class DateTrunc(Func):
Expand Down
9 changes: 8 additions & 1 deletion sqlglot/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,6 @@ class Generator(metaclass=_Generator):
exp.VarMap: lambda self, e: self.func("MAP", e.args["keys"], e.args["values"]),
exp.ViewAttributeProperty: lambda self, e: f"WITH {self.sql(e, 'this')}",
exp.VolatileProperty: lambda *_: "VOLATILE",
exp.WeekStart: lambda self, e: f"WEEK({self.sql(e, 'this')})",
exp.WithJournalTableProperty: lambda self, e: f"WITH JOURNAL TABLE={self.sql(e, 'this')}",
exp.WithProcedureOptions: lambda self, e: f"WITH {self.expressions(e, flat=True)}",
exp.WithSchemaBindingProperty: lambda self, e: f"WITH SCHEMA {self.sql(e, 'this')}",
Expand Down Expand Up @@ -5456,3 +5455,11 @@ def localtime_sql(self, expression: exp.Localtime) -> str:
def localtimestamp_sql(self, expression: exp.Localtime) -> str:
this = expression.this
return self.func("LOCALTIMESTAMP", this) if this else "LOCALTIMESTAMP"

def weekstart_sql(self, expression: exp.WeekStart) -> str:
this = expression.this.name.upper()
if self.dialect.WEEK_OFFSET == -1 and this == "SUNDAY":
# BigQuery specific optimization since WEEK(SUNDAY) == WEEK
return "WEEK"

return self.func("WEEK", expression.this)
77 changes: 76 additions & 1 deletion tests/dialects/test_bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -3232,7 +3232,7 @@ def test_week(self):
self.validate_identity("DATE_TRUNC(date, WEEK(MONDAY))")
self.validate_identity(
"LAST_DAY(DATETIME '2008-11-10 15:30:00', WEEK(SUNDAY))",
"LAST_DAY(CAST('2008-11-10 15:30:00' AS DATETIME), WEEK(SUNDAY))",
"LAST_DAY(CAST('2008-11-10 15:30:00' AS DATETIME), WEEK)",
)
self.validate_identity("DATE_DIFF('2017-12-18', '2017-12-17', WEEK(SATURDAY))")
self.validate_identity("DATETIME_DIFF('2017-12-18', '2017-12-17', WEEK(MONDAY))")
Expand All @@ -3241,6 +3241,81 @@ def test_week(self):
"EXTRACT(WEEK(THURSDAY) FROM CAST('2013-12-25' AS DATE))",
)

# BigQuery → DuckDB transpilation tests for DATE_DIFF with week units
self.validate_all(
"SELECT DATE_DIFF('2024-06-15', '2024-01-08', WEEK(MONDAY))",
write={
"bigquery": "SELECT DATE_DIFF('2024-06-15', '2024-01-08', WEEK(MONDAY))",
"duckdb": "SELECT DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST('2024-01-08' AS DATE)), DATE_TRUNC('WEEK', CAST('2024-06-15' AS DATE)))",
},
)
self.validate_all(
"SELECT DATE_DIFF('2026-01-15', '2024-01-08', WEEK(SUNDAY))",
write={
"bigquery": "SELECT DATE_DIFF('2026-01-15', '2024-01-08', WEEK)",
"duckdb": "SELECT DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST('2024-01-08' AS DATE) + INTERVAL '1' DAY), DATE_TRUNC('WEEK', CAST('2026-01-15' AS DATE) + INTERVAL '1' DAY))",
},
)
self.validate_all(
"SELECT DATE_DIFF('2024-01-15', '2022-04-28', WEEK(SATURDAY))",
write={
"bigquery": "SELECT DATE_DIFF('2024-01-15', '2022-04-28', WEEK(SATURDAY))",
"duckdb": "SELECT DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST('2022-04-28' AS DATE) + INTERVAL '-5' DAY), DATE_TRUNC('WEEK', CAST('2024-01-15' AS DATE) + INTERVAL '-5' DAY))",
},
)
self.validate_all(
"SELECT DATE_DIFF('2024-01-15', '2024-01-08', WEEK)",
write={
"bigquery": "SELECT DATE_DIFF('2024-01-15', '2024-01-08', WEEK)",
"duckdb": "SELECT DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST('2024-01-08' AS DATE) + INTERVAL '1' DAY), DATE_TRUNC('WEEK', CAST('2024-01-15' AS DATE) + INTERVAL '1' DAY))",
},
)
# Test WEEK - Saturday to Sunday boundary (critical test for Sunday-start weeks)
# In BigQuery: Saturday -> Sunday crosses week boundary = 1 week
# Without fix: DuckDB treats as Monday-start weeks = 0 weeks (both in same week)
self.validate_all(
"SELECT DATE_DIFF('2024-01-07', '2024-01-06', WEEK)",
write={
"bigquery": "SELECT DATE_DIFF('2024-01-07', '2024-01-06', WEEK)",
"duckdb": "SELECT DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST('2024-01-06' AS DATE) + INTERVAL '1' DAY), DATE_TRUNC('WEEK', CAST('2024-01-07' AS DATE) + INTERVAL '1' DAY))",
},
)
self.validate_all(
"SELECT DATE_DIFF('2024-01-15', '2024-01-08', ISOWEEK)",
write={
"bigquery": "SELECT DATE_DIFF('2024-01-15', '2024-01-08', ISOWEEK)",
"duckdb": "SELECT DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST('2024-01-08' AS DATE)), DATE_TRUNC('WEEK', CAST('2024-01-15' AS DATE)))",
},
)
self.validate_all(
"SELECT DATE_DIFF(DATE '2024-09-15', DATE '2024-01-08', WEEK(MONDAY))",
write={
"bigquery": "SELECT DATE_DIFF(CAST('2024-09-15' AS DATE), CAST('2024-01-08' AS DATE), WEEK(MONDAY))",
"duckdb": "SELECT DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST('2024-01-08' AS DATE)), DATE_TRUNC('WEEK', CAST('2024-09-15' AS DATE)))",
},
)
self.validate_all(
"SELECT DATE_DIFF(DATE '2024-01-01', DATE '2024-01-15', WEEK(SUNDAY))",
write={
"bigquery": "SELECT DATE_DIFF(CAST('2024-01-01' AS DATE), CAST('2024-01-15' AS DATE), WEEK)",
"duckdb": "SELECT DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST('2024-01-15' AS DATE) + INTERVAL '1' DAY), DATE_TRUNC('WEEK', CAST('2024-01-01' AS DATE) + INTERVAL '1' DAY))",
},
)
self.validate_all(
"SELECT DATE_DIFF(DATE '2023-05-01', DATE '2024-01-15', ISOWEEK)",
write={
"bigquery": "SELECT DATE_DIFF(CAST('2023-05-01' AS DATE), CAST('2024-01-15' AS DATE), ISOWEEK)",
"duckdb": "SELECT DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST('2024-01-15' AS DATE)), DATE_TRUNC('WEEK', CAST('2023-05-01' AS DATE)))",
},
)
self.validate_all(
"SELECT DATE_DIFF(DATE '2024-01-01', DATE '2024-01-15', DAY)",
write={
"bigquery": "SELECT DATE_DIFF(CAST('2024-01-01' AS DATE), CAST('2024-01-15' AS DATE), DAY)",
"duckdb": "SELECT DATE_DIFF('DAY', CAST('2024-01-15' AS DATE), CAST('2024-01-01' AS DATE))",
},
)

def test_approx_qunatiles(self):
self.validate_identity("APPROX_QUANTILES(foo, 2)")
self.validate_identity("APPROX_QUANTILES(DISTINCT foo, 2 RESPECT NULLS)")
Expand Down