-
Notifications
You must be signed in to change notification settings - Fork 1k
feat(duckdb): Add transpilation support for nanoseconds used in date/time functions. #6617
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
1fa600a
c8a9ce5
6555304
5dc6181
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -8,6 +8,7 @@ | |
| from sqlglot import exp, generator, parser, tokens, transforms | ||
|
|
||
| from sqlglot.dialects.dialect import ( | ||
| DATETIME_DELTA, | ||
| Dialect, | ||
| JSON_EXTRACT_TYPE, | ||
| NormalizationStrategy, | ||
|
|
@@ -17,7 +18,7 @@ | |
| bool_xor_sql, | ||
| build_default_decimal_type, | ||
| count_if_to_sum, | ||
| date_delta_to_binary_interval_op, | ||
| date_delta_to_binary_interval_op as base_date_delta_to_binary_interval_op, | ||
| date_trunc_to_time, | ||
| datestrtodate_sql, | ||
| no_datetime_sql, | ||
|
|
@@ -142,6 +143,58 @@ def _last_day_sql(self: DuckDB.Generator, expression: exp.LastDay) -> str: | |
| return self.function_fallback_sql(expression) | ||
|
|
||
|
|
||
| def _unwrap_cast(expr: exp.Expression) -> exp.Expression: | ||
| """Unwrap Cast expression to avoid nested casts when recasting to different types. | ||
|
|
||
| While exp.cast avoids recasting to the SAME type, it doesn't unwrap casts to | ||
| DIFFERENT types. This helper extracts the inner expression before casting to | ||
| avoid nested casts like CAST(CAST(x AS TIMESTAMP) AS TIMESTAMP_NS). | ||
|
|
||
| Example: | ||
| Input: CAST('2023-01-01' AS TIMESTAMP) | ||
| Without unwrap: CAST(CAST('2023-01-01' AS TIMESTAMP) AS TIMESTAMP_NS) | ||
| With unwrap: CAST('2023-01-01' AS TIMESTAMP_NS) | ||
| """ | ||
| return expr.this if isinstance(expr, exp.Cast) else expr | ||
|
|
||
|
|
||
| def _is_nanosecond_unit(unit: t.Optional[exp.Expression]) -> bool: | ||
| """Check if unit is NANOSECOND.""" | ||
georgesittas marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| return isinstance(unit, (exp.Var, exp.Literal)) and unit.name.upper() == "NANOSECOND" | ||
|
|
||
|
|
||
| def _handle_nanosecond_diff( | ||
| self: DuckDB.Generator, | ||
| end_time: exp.Expression, | ||
| start_time: exp.Expression, | ||
| ) -> str: | ||
| """Generate NANOSECOND diff using EPOCH_NS since DATE_DIFF doesn't support it.""" | ||
| end_ns = exp.cast(_unwrap_cast(end_time), exp.DataType.Type.TIMESTAMP_NS) | ||
| start_ns = exp.cast(_unwrap_cast(start_time), exp.DataType.Type.TIMESTAMP_NS) | ||
VaggelisD marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| # Build expression tree: EPOCH_NS(end) - EPOCH_NS(start) | ||
| return self.sql( | ||
| exp.Sub(this=exp.func("EPOCH_NS", end_ns), expression=exp.func("EPOCH_NS", start_ns)) | ||
| ) | ||
|
|
||
|
|
||
| def _handle_nanosecond_add( | ||
| self: DuckDB.Generator, | ||
| timestamp: exp.Expression, | ||
| nanoseconds: exp.Expression, | ||
| ) -> str: | ||
| """Generate NANOSECOND add using EPOCH_NS and make_timestamp_ns since INTERVAL doesn't support it.""" | ||
| timestamp_ns = exp.cast(_unwrap_cast(timestamp), exp.DataType.Type.TIMESTAMP_NS) | ||
georgesittas marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| # Build expression tree: make_timestamp_ns(EPOCH_NS(timestamp) + nanoseconds) | ||
| return self.sql( | ||
| exp.func( | ||
| "make_timestamp_ns", | ||
| exp.Add(this=exp.func("EPOCH_NS", timestamp_ns), expression=nanoseconds), | ||
| ) | ||
| ) | ||
|
|
||
|
|
||
| def _to_boolean_sql(self: DuckDB.Generator, expression: exp.ToBoolean) -> str: | ||
| """ | ||
| Transpile TO_BOOLEAN and TRY_TO_BOOLEAN functions from Snowflake to DuckDB equivalent. | ||
|
|
@@ -215,6 +268,11 @@ def _date_sql(self: DuckDB.Generator, expression: exp.Date) -> str: | |
|
|
||
| # BigQuery -> DuckDB conversion for the TIME_DIFF function | ||
| def _timediff_sql(self: DuckDB.Generator, expression: exp.TimeDiff) -> str: | ||
| unit = expression.args.get("unit") | ||
georgesittas marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| if _is_nanosecond_unit(unit): | ||
| return _handle_nanosecond_diff(self, expression.expression, expression.this) | ||
|
|
||
| this = exp.cast(expression.this, exp.DataType.Type.TIME) | ||
| expr = exp.cast(expression.expression, exp.DataType.Type.TIME) | ||
|
|
||
|
|
@@ -223,6 +281,27 @@ def _timediff_sql(self: DuckDB.Generator, expression: exp.TimeDiff) -> str: | |
| return self.func("DATE_DIFF", unit_to_str(expression), expr, this) | ||
|
|
||
|
|
||
| def date_delta_to_binary_interval_op( | ||
| cast: bool = True, | ||
| ) -> t.Callable[[DuckDB.Generator, DATETIME_DELTA], str]: | ||
| """DuckDB override to handle NANOSECOND operations; delegates other units to base.""" | ||
| base_impl = base_date_delta_to_binary_interval_op(cast=cast) | ||
|
Comment on lines
+268
to
+272
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Lets prefix the function with ```suggestion
def _date_delta_to_binary_interval_op(
cast: bool = True,
) -> t.Callable[[DuckDB.Generator, DATETIME_DELTA], str]:
"""DuckDB override to handle NANOSECOND operations; delegates other units to base."""
base_impl = date_delta_to_binary_interval_op(cast=cast) |
||
|
|
||
| def duckdb_date_delta_sql(self: DuckDB.Generator, expression: DATETIME_DELTA) -> str: | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [Nit] Lets also prefix this with |
||
| unit = expression.args.get("unit") | ||
|
|
||
| # Handle NANOSECOND unit (DuckDB doesn't support INTERVAL ... NANOSECOND) | ||
| if _is_nanosecond_unit(unit): | ||
| interval_value = expression.expression | ||
| if isinstance(interval_value, exp.Interval): | ||
| interval_value = interval_value.this | ||
| return _handle_nanosecond_add(self, expression.this, interval_value) | ||
georgesittas marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| return base_impl(self, expression) | ||
|
|
||
| return duckdb_date_delta_sql | ||
|
|
||
|
|
||
| @unsupported_args(("expression", "DuckDB's ARRAY_SORT does not support a comparator.")) | ||
| def _array_sort_sql(self: DuckDB.Generator, expression: exp.ArraySort) -> str: | ||
| return self.func("ARRAY_SORT", expression.this) | ||
|
|
@@ -439,9 +518,13 @@ def _build_week_trunc_expression(date_expr: exp.Expression, start_dow: int) -> e | |
|
|
||
|
|
||
| def _date_diff_sql(self: DuckDB.Generator, expression: exp.DateDiff) -> str: | ||
| unit = expression.args.get("unit") | ||
georgesittas marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| if _is_nanosecond_unit(unit): | ||
| return _handle_nanosecond_diff(self, expression.this, expression.expression) | ||
|
Comment on lines
+507
to
+508
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [Nit] We can inline the implementations of both |
||
|
|
||
| this = _implicit_datetime_cast(expression.this) | ||
| expr = _implicit_datetime_cast(expression.expression) | ||
| unit = expression.args.get("unit") | ||
|
|
||
| # DuckDB's WEEK diff does not respect Monday crossing (week boundaries), it checks (end_day - start_day) / 7: | ||
| # SELECT DATE_DIFF('WEEK', CAST('2024-12-13' AS DATE), CAST('2024-12-17' AS DATE)) --> 0 (Monday crossed) | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.