Skip to content

Commit 4aea018

Browse files
feat(duckdb): Add transpilation support for nanoseconds used in date/time functions. (#6617)
* feat(duckdb): Add transpilation support for nanoseconds used in date/time functions * Address review comments: remove redundant casting and fix test cases - Remove redundant TIMESTAMP_NS casting in _timediff_sql (lines 268-271) * Previously cast to TIMESTAMP_NS before calling _handle_nanosecond_diff * _handle_nanosecond_diff already handles casting via exp.cast (which avoids recasting) * Now passes expressions directly, matching _date_diff_sql pattern - Fix TIMEDIFF test case to use valid Snowflake syntax * Changed from TIME literal '10:00:00.000000000' * To TIMESTAMP with CAST: CAST('2023-01-01 10:00:00.000000000' AS TIMESTAMP) * TIME literals don't work with TIMEDIFF in Snowflake - Fix TIMEADD test case to use valid Snowflake syntax * Changed from TIME literal '10:00:00.000000000' * To TIMESTAMP with CAST: CAST('2023-01-01 10:00:00.000000000' AS TIMESTAMP) * TIME literals don't work with TIMEADD in Snowflake Addresses review comments from VaggelisD on PR #6617 * Address review comments: simplify code and improve clarity - Use expression.unit property accessor instead of expression.args.get('unit') * Updated in _timediff_sql, date_delta_to_binary_interval_op, _date_diff_sql * More concise and idiomatic - Remove unnecessary docstring from _is_nanosecond_unit * Function name is self-explanatory - Keep _unwrap_cast helper function * Necessary to avoid nested casts like CAST(CAST(x AS TIMESTAMP) AS TIMESTAMP_NS) * exp.cast only avoids recasting to SAME type, not DIFFERENT types * Example: CAST('2023-01-01' AS TIMESTAMP) → without unwrap → CAST(CAST(...) AS TIMESTAMP_NS) * With unwrap: extracts '2023-01-01' → CAST('2023-01-01' AS TIMESTAMP_NS) - cast parameter not needed for NANOSECOND handling * NANOSECOND operations require EPOCH_NS/make_timestamp_ns * These functions require TIMESTAMP_NS type * Must always cast regardless of cast parameter * cast parameter only applies to base implementation's interval operations Addresses review comments from georgesittas on PR #6617 * feat(duckdb): Remove the unwrapping logic and remove the corresponding tests for simplicity
1 parent fc5800d commit 4aea018

File tree

2 files changed

+132
-2
lines changed

2 files changed

+132
-2
lines changed

sqlglot/dialects/duckdb.py

Lines changed: 69 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from sqlglot import exp, generator, parser, tokens, transforms
99

1010
from sqlglot.dialects.dialect import (
11+
DATETIME_DELTA,
1112
Dialect,
1213
JSON_EXTRACT_TYPE,
1314
NormalizationStrategy,
@@ -17,7 +18,7 @@
1718
bool_xor_sql,
1819
build_default_decimal_type,
1920
count_if_to_sum,
20-
date_delta_to_binary_interval_op,
21+
date_delta_to_binary_interval_op as base_date_delta_to_binary_interval_op,
2122
date_trunc_to_time,
2223
datestrtodate_sql,
2324
no_datetime_sql,
@@ -142,6 +143,42 @@ def _last_day_sql(self: DuckDB.Generator, expression: exp.LastDay) -> str:
142143
return self.function_fallback_sql(expression)
143144

144145

146+
def _is_nanosecond_unit(unit: t.Optional[exp.Expression]) -> bool:
147+
return isinstance(unit, (exp.Var, exp.Literal)) and unit.name.upper() == "NANOSECOND"
148+
149+
150+
def _handle_nanosecond_diff(
151+
self: DuckDB.Generator,
152+
end_time: exp.Expression,
153+
start_time: exp.Expression,
154+
) -> str:
155+
"""Generate NANOSECOND diff using EPOCH_NS since DATE_DIFF doesn't support it."""
156+
end_ns = exp.cast(end_time, exp.DataType.Type.TIMESTAMP_NS)
157+
start_ns = exp.cast(start_time, exp.DataType.Type.TIMESTAMP_NS)
158+
159+
# Build expression tree: EPOCH_NS(end) - EPOCH_NS(start)
160+
return self.sql(
161+
exp.Sub(this=exp.func("EPOCH_NS", end_ns), expression=exp.func("EPOCH_NS", start_ns))
162+
)
163+
164+
165+
def _handle_nanosecond_add(
166+
self: DuckDB.Generator,
167+
timestamp: exp.Expression,
168+
nanoseconds: exp.Expression,
169+
) -> str:
170+
"""Generate NANOSECOND add using EPOCH_NS and make_timestamp_ns since INTERVAL doesn't support it."""
171+
timestamp_ns = exp.cast(timestamp, exp.DataType.Type.TIMESTAMP_NS)
172+
173+
# Build expression tree: make_timestamp_ns(EPOCH_NS(timestamp) + nanoseconds)
174+
return self.sql(
175+
exp.func(
176+
"make_timestamp_ns",
177+
exp.Add(this=exp.func("EPOCH_NS", timestamp_ns), expression=nanoseconds),
178+
)
179+
)
180+
181+
145182
def _to_boolean_sql(self: DuckDB.Generator, expression: exp.ToBoolean) -> str:
146183
"""
147184
Transpile TO_BOOLEAN and TRY_TO_BOOLEAN functions from Snowflake to DuckDB equivalent.
@@ -215,6 +252,11 @@ def _date_sql(self: DuckDB.Generator, expression: exp.Date) -> str:
215252

216253
# BigQuery -> DuckDB conversion for the TIME_DIFF function
217254
def _timediff_sql(self: DuckDB.Generator, expression: exp.TimeDiff) -> str:
255+
unit = expression.unit
256+
257+
if _is_nanosecond_unit(unit):
258+
return _handle_nanosecond_diff(self, expression.expression, expression.this)
259+
218260
this = exp.cast(expression.this, exp.DataType.Type.TIME)
219261
expr = exp.cast(expression.expression, exp.DataType.Type.TIME)
220262

@@ -223,6 +265,27 @@ def _timediff_sql(self: DuckDB.Generator, expression: exp.TimeDiff) -> str:
223265
return self.func("DATE_DIFF", unit_to_str(expression), expr, this)
224266

225267

268+
def date_delta_to_binary_interval_op(
269+
cast: bool = True,
270+
) -> t.Callable[[DuckDB.Generator, DATETIME_DELTA], str]:
271+
"""DuckDB override to handle NANOSECOND operations; delegates other units to base."""
272+
base_impl = base_date_delta_to_binary_interval_op(cast=cast)
273+
274+
def duckdb_date_delta_sql(self: DuckDB.Generator, expression: DATETIME_DELTA) -> str:
275+
unit = expression.unit
276+
277+
# Handle NANOSECOND unit (DuckDB doesn't support INTERVAL ... NANOSECOND)
278+
if _is_nanosecond_unit(unit):
279+
interval_value = expression.expression
280+
if isinstance(interval_value, exp.Interval):
281+
interval_value = interval_value.this
282+
return _handle_nanosecond_add(self, expression.this, interval_value)
283+
284+
return base_impl(self, expression)
285+
286+
return duckdb_date_delta_sql
287+
288+
226289
@unsupported_args(("expression", "DuckDB's ARRAY_SORT does not support a comparator."))
227290
def _array_sort_sql(self: DuckDB.Generator, expression: exp.ArraySort) -> str:
228291
return self.func("ARRAY_SORT", expression.this)
@@ -439,9 +502,13 @@ def _build_week_trunc_expression(date_expr: exp.Expression, start_dow: int) -> e
439502

440503

441504
def _date_diff_sql(self: DuckDB.Generator, expression: exp.DateDiff) -> str:
505+
unit = expression.unit
506+
507+
if _is_nanosecond_unit(unit):
508+
return _handle_nanosecond_diff(self, expression.this, expression.expression)
509+
442510
this = _implicit_datetime_cast(expression.this)
443511
expr = _implicit_datetime_cast(expression.expression)
444-
unit = expression.args.get("unit")
445512

446513
# DuckDB's WEEK diff does not respect Monday crossing (week boundaries), it checks (end_day - start_day) / 7:
447514
# SELECT DATE_DIFF('WEEK', CAST('2024-12-13' AS DATE), CAST('2024-12-17' AS DATE)) --> 0 (Monday crossed)

tests/dialects/test_snowflake.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2489,6 +2489,69 @@ def test_timestamps(self):
24892489
},
24902490
)
24912491

2492+
# Test DATEDIFF with NANOSECOND - DuckDB uses EPOCH_NS since DATE_DIFF doesn't support NANOSECOND
2493+
self.validate_all(
2494+
"DATEDIFF(NANOSECOND, '2023-01-01 10:00:00.000000000', '2023-01-01 10:00:00.123456789')",
2495+
write={
2496+
"duckdb": "EPOCH_NS(CAST('2023-01-01 10:00:00.123456789' AS TIMESTAMP_NS)) - EPOCH_NS(CAST('2023-01-01 10:00:00.000000000' AS TIMESTAMP_NS))",
2497+
"snowflake": "DATEDIFF(NANOSECOND, '2023-01-01 10:00:00.000000000', '2023-01-01 10:00:00.123456789')",
2498+
},
2499+
)
2500+
2501+
# Test DATEDIFF with NANOSECOND on columns
2502+
self.validate_all(
2503+
"DATEDIFF(NANOSECOND, start_time, end_time)",
2504+
write={
2505+
"duckdb": "EPOCH_NS(CAST(end_time AS TIMESTAMP_NS)) - EPOCH_NS(CAST(start_time AS TIMESTAMP_NS))",
2506+
"snowflake": "DATEDIFF(NANOSECOND, start_time, end_time)",
2507+
},
2508+
)
2509+
2510+
# Test DATEADD with NANOSECOND - DuckDB uses MAKE_TIMESTAMP_NS since INTERVAL doesn't support NANOSECOND
2511+
self.validate_all(
2512+
"DATEADD(NANOSECOND, 123456789, '2023-01-01 10:00:00.000000000')",
2513+
write={
2514+
"duckdb": "MAKE_TIMESTAMP_NS(EPOCH_NS(CAST('2023-01-01 10:00:00.000000000' AS TIMESTAMP_NS)) + 123456789)",
2515+
"snowflake": "DATEADD(NANOSECOND, 123456789, '2023-01-01 10:00:00.000000000')",
2516+
},
2517+
)
2518+
2519+
# Test DATEADD with NANOSECOND on columns
2520+
self.validate_all(
2521+
"DATEADD(NANOSECOND, nano_offset, timestamp_col)",
2522+
write={
2523+
"duckdb": "MAKE_TIMESTAMP_NS(EPOCH_NS(CAST(timestamp_col AS TIMESTAMP_NS)) + nano_offset)",
2524+
"snowflake": "DATEADD(NANOSECOND, nano_offset, timestamp_col)",
2525+
},
2526+
)
2527+
2528+
# Test negative NANOSECOND values (subtraction)
2529+
self.validate_all(
2530+
"DATEADD(NANOSECOND, -123456789, '2023-01-01 10:00:00.500000000')",
2531+
write={
2532+
"duckdb": "MAKE_TIMESTAMP_NS(EPOCH_NS(CAST('2023-01-01 10:00:00.500000000' AS TIMESTAMP_NS)) + -123456789)",
2533+
"snowflake": "DATEADD(NANOSECOND, -123456789, '2023-01-01 10:00:00.500000000')",
2534+
},
2535+
)
2536+
2537+
# Test TIMESTAMPDIFF with NANOSECOND - Snowflake parser converts to DATEDIFF
2538+
self.validate_all(
2539+
"TIMESTAMPDIFF(NANOSECOND, '2023-01-01 10:00:00.000000000', '2023-01-01 10:00:00.123456789')",
2540+
write={
2541+
"duckdb": "EPOCH_NS(CAST('2023-01-01 10:00:00.123456789' AS TIMESTAMP_NS)) - EPOCH_NS(CAST('2023-01-01 10:00:00.000000000' AS TIMESTAMP_NS))",
2542+
"snowflake": "DATEDIFF(NANOSECOND, '2023-01-01 10:00:00.000000000', '2023-01-01 10:00:00.123456789')",
2543+
},
2544+
)
2545+
2546+
# Test TIMESTAMPADD with NANOSECOND - Snowflake parser converts to DATEADD
2547+
self.validate_all(
2548+
"TIMESTAMPADD(NANOSECOND, 123456789, '2023-01-01 10:00:00.000000000')",
2549+
write={
2550+
"duckdb": "MAKE_TIMESTAMP_NS(EPOCH_NS(CAST('2023-01-01 10:00:00.000000000' AS TIMESTAMP_NS)) + 123456789)",
2551+
"snowflake": "DATEADD(NANOSECOND, 123456789, '2023-01-01 10:00:00.000000000')",
2552+
},
2553+
)
2554+
24922555
self.validate_identity("DATEADD(y, 5, x)", "DATEADD(YEAR, 5, x)")
24932556
self.validate_identity("DATEADD(y, 5, x)", "DATEADD(YEAR, 5, x)")
24942557
self.validate_identity("DATE_PART(yyy, x)", "DATE_PART(YEAR, x)")

0 commit comments

Comments
 (0)