88from sqlglot import exp , generator , parser , tokens , transforms
99
1010from sqlglot .dialects .dialect import (
11+ DATETIME_DELTA ,
1112 Dialect ,
1213 JSON_EXTRACT_TYPE ,
1314 NormalizationStrategy ,
8990MAX_BIT_POSITION = exp .Literal .number (32768 )
9091
9192
93+ def _last_day_sql (self : DuckDB .Generator , expression : exp .LastDay ) -> str :
94+ """
95+ DuckDB's LAST_DAY only supports finding the last day of a month.
96+ For other date parts (year, quarter, week), we need to implement equivalent logic.
97+ """
98+ date_expr = expression .this
99+ unit = expression .text ("unit" )
100+
101+ if not unit or unit .upper () == "MONTH" :
102+ # Default behavior - use DuckDB's native LAST_DAY
103+ return self .func ("LAST_DAY" , date_expr )
104+
105+ if unit .upper () == "YEAR" :
106+ # Last day of year: December 31st of the same year
107+ year_expr = exp .func ("EXTRACT" , "YEAR" , date_expr )
108+ make_date_expr = exp .func (
109+ "MAKE_DATE" , year_expr , exp .Literal .number (12 ), exp .Literal .number (31 )
110+ )
111+ return self .sql (make_date_expr )
112+
113+ if unit .upper () == "QUARTER" :
114+ # Last day of quarter
115+ year_expr = exp .func ("EXTRACT" , "YEAR" , date_expr )
116+ quarter_expr = exp .func ("EXTRACT" , "QUARTER" , date_expr )
117+
118+ # Calculate last month of quarter: quarter * 3. Quarter can be 1 to 4
119+ last_month_expr = exp .Mul (this = quarter_expr , expression = exp .Literal .number (3 ))
120+ first_day_last_month_expr = exp .func (
121+ "MAKE_DATE" , year_expr , last_month_expr , exp .Literal .number (1 )
122+ )
123+
124+ # Last day of the last month of the quarter
125+ last_day_expr = exp .func ("LAST_DAY" , first_day_last_month_expr )
126+ return self .sql (last_day_expr )
127+
128+ if unit .upper () == "WEEK" :
129+ # DuckDB DAYOFWEEK: Sunday=0, Monday=1, ..., Saturday=6
130+ dow = exp .func ("EXTRACT" , "DAYOFWEEK" , date_expr )
131+ # Days to the last day of week: (7 - dayofweek) % 7, assuming the last day of week is Sunday (Snowflake)
132+ # Wrap in parentheses to ensure correct precedence
133+ days_to_sunday_expr = exp .Mod (
134+ this = exp .Paren (this = exp .Sub (this = exp .Literal .number (7 ), expression = dow )),
135+ expression = exp .Literal .number (7 ),
136+ )
137+ interval_expr = exp .Interval (this = days_to_sunday_expr , unit = exp .var ("DAY" ))
138+ add_expr = exp .Add (this = date_expr , expression = interval_expr )
139+ cast_expr = exp .cast (add_expr , exp .DataType .Type .DATE )
140+ return self .sql (cast_expr )
141+
142+ self .unsupported (f"Unsupported date part '{ unit } ' in LAST_DAY function" )
143+ return self .function_fallback_sql (expression )
144+
145+
146+ def _is_nanosecond_unit (unit : t .Optional [exp .Expression ]) -> bool :
147+ return isinstance (unit , (exp .Var , exp .Literal )) and unit .name .upper () == "NANOSECOND"
148+
149+
150+ def _handle_nanosecond_diff (
151+ self : DuckDB .Generator ,
152+ end_time : exp .Expression ,
153+ start_time : exp .Expression ,
154+ ) -> str :
155+ """Generate NANOSECOND diff using EPOCH_NS since DATE_DIFF doesn't support it."""
156+ end_ns = exp .cast (end_time , exp .DataType .Type .TIMESTAMP_NS )
157+ start_ns = exp .cast (start_time , exp .DataType .Type .TIMESTAMP_NS )
158+
159+ # Build expression tree: EPOCH_NS(end) - EPOCH_NS(start)
160+ return self .sql (
161+ exp .Sub (this = exp .func ("EPOCH_NS" , end_ns ), expression = exp .func ("EPOCH_NS" , start_ns ))
162+ )
163+
164+
92165def _to_boolean_sql (self : DuckDB .Generator , expression : exp .ToBoolean ) -> str :
93166 """
94167 Transpile TO_BOOLEAN and TRY_TO_BOOLEAN functions from Snowflake to DuckDB equivalent.
@@ -162,6 +235,11 @@ def _date_sql(self: DuckDB.Generator, expression: exp.Date) -> str:
162235
163236# BigQuery -> DuckDB conversion for the TIME_DIFF function
164237def _timediff_sql (self : DuckDB .Generator , expression : exp .TimeDiff ) -> str :
238+ unit = expression .unit
239+
240+ if _is_nanosecond_unit (unit ):
241+ return _handle_nanosecond_diff (self , expression .expression , expression .this )
242+
165243 this = exp .cast (expression .this , exp .DataType .Type .TIME )
166244 expr = exp .cast (expression .expression , exp .DataType .Type .TIME )
167245
@@ -170,6 +248,35 @@ def _timediff_sql(self: DuckDB.Generator, expression: exp.TimeDiff) -> str:
170248 return self .func ("DATE_DIFF" , unit_to_str (expression ), expr , this )
171249
172250
251+ def _date_delta_to_binary_interval_op (
252+ cast : bool = True ,
253+ ) -> t .Callable [[DuckDB .Generator , DATETIME_DELTA ], str ]:
254+ """DuckDB override to handle NANOSECOND operations; delegates other units to base."""
255+ base_impl = date_delta_to_binary_interval_op (cast = cast )
256+
257+ def _duckdb_date_delta_sql (self : DuckDB .Generator , expression : DATETIME_DELTA ) -> str :
258+ unit = expression .unit
259+
260+ # Handle NANOSECOND unit (DuckDB doesn't support INTERVAL ... NANOSECOND)
261+ if _is_nanosecond_unit (unit ):
262+ interval_value = expression .expression
263+ if isinstance (interval_value , exp .Interval ):
264+ interval_value = interval_value .this
265+
266+ timestamp_ns = exp .cast (expression .this , exp .DataType .Type .TIMESTAMP_NS )
267+
268+ return self .sql (
269+ exp .func (
270+ "MAKE_TIMESTAMP_NS" ,
271+ exp .Add (this = exp .func ("EPOCH_NS" , timestamp_ns ), expression = interval_value ),
272+ )
273+ )
274+
275+ return base_impl (self , expression )
276+
277+ return _duckdb_date_delta_sql
278+
279+
173280@unsupported_args (("expression" , "DuckDB's ARRAY_SORT does not support a comparator." ))
174281def _array_sort_sql (self : DuckDB .Generator , expression : exp .ArraySort ) -> str :
175282 return self .func ("ARRAY_SORT" , expression .this )
@@ -386,9 +493,13 @@ def _build_week_trunc_expression(date_expr: exp.Expression, start_dow: int) -> e
386493
387494
388495def _date_diff_sql (self : DuckDB .Generator , expression : exp .DateDiff ) -> str :
496+ unit = expression .unit
497+
498+ if _is_nanosecond_unit (unit ):
499+ return _handle_nanosecond_diff (self , expression .this , expression .expression )
500+
389501 this = _implicit_datetime_cast (expression .this )
390502 expr = _implicit_datetime_cast (expression .expression )
391- unit = expression .args .get ("unit" )
392503
393504 # DuckDB's WEEK diff does not respect Monday crossing (week boundaries), it checks (end_day - start_day) / 7:
394505 # SELECT DATE_DIFF('WEEK', CAST('2024-12-13' AS DATE), CAST('2024-12-17' AS DATE)) --> 0 (Monday crossed)
@@ -1200,15 +1311,15 @@ class Generator(generator.Generator):
12001311 ),
12011312 exp .DataType : _datatype_sql ,
12021313 exp .Date : _date_sql ,
1203- exp .DateAdd : date_delta_to_binary_interval_op (),
1314+ exp .DateAdd : _date_delta_to_binary_interval_op (),
12041315 exp .DateFromParts : rename_func ("MAKE_DATE" ),
1205- exp .DateSub : date_delta_to_binary_interval_op (),
1316+ exp .DateSub : _date_delta_to_binary_interval_op (),
12061317 exp .DateDiff : _date_diff_sql ,
12071318 exp .DateStrToDate : datestrtodate_sql ,
12081319 exp .Datetime : no_datetime_sql ,
12091320 exp .DatetimeDiff : _date_diff_sql ,
1210- exp .DatetimeSub : date_delta_to_binary_interval_op (),
1211- exp .DatetimeAdd : date_delta_to_binary_interval_op (),
1321+ exp .DatetimeSub : _date_delta_to_binary_interval_op (),
1322+ exp .DatetimeAdd : _date_delta_to_binary_interval_op (),
12121323 exp .DateToDi : lambda self ,
12131324 e : f"CAST(STRFTIME({ self .sql (e , 'this' )} , { DuckDB .DATEINT_FORMAT } ) AS INT)" ,
12141325 exp .Decode : lambda self , e : encode_decode_sql (self , e , "DECODE" , replace = False ),
@@ -1272,16 +1383,16 @@ class Generator(generator.Generator):
12721383 ),
12731384 exp .Struct : _struct_sql ,
12741385 exp .Transform : rename_func ("LIST_TRANSFORM" ),
1275- exp .TimeAdd : date_delta_to_binary_interval_op (),
1276- exp .TimeSub : date_delta_to_binary_interval_op (),
1386+ exp .TimeAdd : _date_delta_to_binary_interval_op (),
1387+ exp .TimeSub : _date_delta_to_binary_interval_op (),
12771388 exp .Time : no_time_sql ,
12781389 exp .TimeDiff : _timediff_sql ,
12791390 exp .Timestamp : no_timestamp_sql ,
1280- exp .TimestampAdd : date_delta_to_binary_interval_op (),
1391+ exp .TimestampAdd : _date_delta_to_binary_interval_op (),
12811392 exp .TimestampDiff : lambda self , e : self .func (
12821393 "DATE_DIFF" , exp .Literal .string (e .unit ), e .expression , e .this
12831394 ),
1284- exp .TimestampSub : date_delta_to_binary_interval_op (),
1395+ exp .TimestampSub : _date_delta_to_binary_interval_op (),
12851396 exp .TimeStrToDate : lambda self , e : self .sql (exp .cast (e .this , exp .DataType .Type .DATE )),
12861397 exp .TimeStrToTime : timestrtotime_sql ,
12871398 exp .TimeStrToUnix : lambda self , e : self .func (
@@ -1292,7 +1403,7 @@ class Generator(generator.Generator):
12921403 exp .TimeToUnix : rename_func ("EPOCH" ),
12931404 exp .TsOrDiToDi : lambda self ,
12941405 e : f"CAST(SUBSTR(REPLACE(CAST({ self .sql (e , 'this' )} AS TEXT), '-', ''), 1, 8) AS INT)" ,
1295- exp .TsOrDsAdd : date_delta_to_binary_interval_op (),
1406+ exp .TsOrDsAdd : _date_delta_to_binary_interval_op (),
12961407 exp .TsOrDsDiff : lambda self , e : self .func (
12971408 "DATE_DIFF" ,
12981409 f"'{ e .args .get ('unit' ) or 'DAY' } '" ,
@@ -1323,6 +1434,7 @@ class Generator(generator.Generator):
13231434 exp .JSONObjectAgg : rename_func ("JSON_GROUP_OBJECT" ),
13241435 exp .JSONBObjectAgg : rename_func ("JSON_GROUP_OBJECT" ),
13251436 exp .DateBin : rename_func ("TIME_BUCKET" ),
1437+ exp .LastDay : _last_day_sql ,
13261438 }
13271439
13281440 SUPPORTED_JSON_PATH_PARTS = {
@@ -1459,6 +1571,42 @@ class Generator(generator.Generator):
14591571 exp .NthValue ,
14601572 )
14611573
1574+ # Template for ZIPF transpilation - placeholders get replaced with actual parameters
1575+ ZIPF_TEMPLATE : exp .Expression = exp .maybe_parse (
1576+ """
1577+ WITH rand AS (SELECT :random_expr AS r),
1578+ weights AS (
1579+ SELECT i, 1.0 / POWER(i, :s) AS w
1580+ FROM RANGE(1, :n + 1) AS t(i)
1581+ ),
1582+ cdf AS (
1583+ SELECT i, SUM(w) OVER (ORDER BY i) / SUM(w) OVER () AS p
1584+ FROM weights
1585+ )
1586+ SELECT MIN(i)
1587+ FROM cdf
1588+ WHERE p >= (SELECT r FROM rand)
1589+ """
1590+ )
1591+
1592+ # Template for RANDSTR transpilation - placeholders get replaced with actual parameters
1593+ RANDSTR_TEMPLATE : exp .Expression = exp .maybe_parse (
1594+ f"""
1595+ SELECT LISTAGG(
1596+ SUBSTRING(
1597+ '{ RANDSTR_CHAR_POOL } ',
1598+ 1 + CAST(FLOOR(random_value * 62) AS INT),
1599+ 1
1600+ ),
1601+ ''
1602+ )
1603+ FROM (
1604+ SELECT (ABS(HASH(i + :seed)) % 1000) / 1000.0 AS random_value
1605+ FROM RANGE(:length) AS t(i)
1606+ )
1607+ """ ,
1608+ )
1609+
14621610 def bitmapbitposition_sql (self : DuckDB .Generator , expression : exp .BitmapBitPosition ) -> str :
14631611 """
14641612 Transpile Snowflake's BITMAP_BIT_POSITION to DuckDB CASE expression.
@@ -1485,6 +1633,7 @@ def bitmapbitposition_sql(self: DuckDB.Generator, expression: exp.BitmapBitPosit
14851633 def randstr_sql (self : DuckDB .Generator , expression : exp .Randstr ) -> str :
14861634 """
14871635 Transpile Snowflake's RANDSTR to DuckDB equivalent using deterministic hash-based random.
1636+ Uses a pre-parsed template with placeholders replaced by expression nodes.
14881637
14891638 RANDSTR(length, generator) generates a random string of specified length.
14901639 - With numeric seed: Use HASH(i + seed) for deterministic output (same seed = same result)
@@ -1505,27 +1654,35 @@ def randstr_sql(self: DuckDB.Generator, expression: exp.Randstr) -> str:
15051654 # No generator specified, use default seed (arbitrary but deterministic)
15061655 seed_value = exp .Literal .number (RANDSTR_SEED )
15071656
1508- length_sql = self .sql (length )
1509- seed_sql = self .sql (seed_value )
1657+ replacements = {"seed" : seed_value , "length" : length }
1658+ return f"({ self .sql (exp .replace_placeholders (self .RANDSTR_TEMPLATE , ** replacements ))} )"
1659+
1660+ def zipf_sql (self : DuckDB .Generator , expression : exp .Zipf ) -> str :
1661+ """
1662+ Transpile Snowflake's ZIPF to DuckDB using CDF-based inverse sampling.
1663+ Uses a pre-parsed template with placeholders replaced by expression nodes.
1664+ """
1665+ s = expression .this
1666+ n = expression .args ["elementcount" ]
1667+ gen = expression .args ["gen" ]
15101668
1511- query : exp .Select = exp .maybe_parse (
1512- f"""
1513- SELECT LISTAGG(
1514- SUBSTRING(
1515- '{ RANDSTR_CHAR_POOL } ',
1516- 1 + CAST(FLOOR(random_value * 62) AS INT),
1517- 1
1669+ if gen and not isinstance (gen , exp .Rand ):
1670+ # (ABS(HASH(seed)) % 1000000) / 1000000.0
1671+ random_expr : exp .Expression = exp .Div (
1672+ this = exp .Paren (
1673+ this = exp .Mod (
1674+ this = exp .Abs (this = exp .Anonymous (this = "HASH" , expressions = [gen .copy ()])),
1675+ expression = exp .Literal .number (1000000 ),
1676+ )
15181677 ),
1519- ''
1520- )
1521- FROM (
1522- SELECT (ABS(HASH(i + { seed_sql } )) % 1000) / 1000.0 AS random_value
1523- FROM RANGE({ length_sql } ) AS t(i)
1678+ expression = exp .Literal .number (1000000.0 ),
15241679 )
1525- """ ,
1526- dialect = "duckdb" ,
1527- )
1528- return f"({ self .sql (query )} )"
1680+ else :
1681+ # Use RANDOM() for non-deterministic output
1682+ random_expr = exp .Rand ()
1683+
1684+ replacements = {"s" : s , "n" : n , "random_expr" : random_expr }
1685+ return f"({ self .sql (exp .replace_placeholders (self .ZIPF_TEMPLATE , ** replacements ))} )"
15291686
15301687 def tobinary_sql (self : DuckDB .Generator , expression : exp .ToBinary ) -> str :
15311688 """
0 commit comments