@@ -1580,6 +1580,24 @@ class Generator(generator.Generator):
15801580 exp .NthValue ,
15811581 )
15821582
1583+ # Template for ZIPF transpilation - placeholders get replaced with actual parameters
1584+ ZIPF_TEMPLATE : exp .Expression = exp .maybe_parse (
1585+ """
1586+ WITH rand AS (SELECT :random_expr AS r),
1587+ weights AS (
1588+ SELECT i, 1.0 / POWER(i, :s) AS w
1589+ FROM RANGE(1, :n + 1) AS t(i)
1590+ ),
1591+ cdf AS (
1592+ SELECT i, SUM(w) OVER (ORDER BY i) / SUM(w) OVER () AS p
1593+ FROM weights
1594+ )
1595+ SELECT MIN(i)
1596+ FROM cdf
1597+ WHERE p >= (SELECT r FROM rand)
1598+ """
1599+ )
1600+
15831601 def bitmapbitposition_sql (self : DuckDB .Generator , expression : exp .BitmapBitPosition ) -> str :
15841602 """
15851603 Transpile Snowflake's BITMAP_BIT_POSITION to DuckDB CASE expression.
@@ -1648,40 +1666,18 @@ def randstr_sql(self: DuckDB.Generator, expression: exp.Randstr) -> str:
16481666 )
16491667 return f"({ self .sql (query )} )"
16501668
1651- # Template for ZIPF transpilation - placeholders get replaced with actual parameters
1652- ZIPF_TEMPLATE : t .ClassVar [exp .Expression ] = exp .maybe_parse (
1653- """
1654- WITH rand AS (SELECT :random_expr AS r),
1655- weights AS (
1656- SELECT i, 1.0 / POWER(i, :s) AS w
1657- FROM RANGE(1, :n + 1) AS t(i)
1658- ),
1659- cdf AS (
1660- SELECT i, SUM(w) OVER (ORDER BY i) / SUM(w) OVER () AS p
1661- FROM weights
1662- )
1663- SELECT MIN(i)
1664- FROM cdf
1665- WHERE p >= (SELECT r FROM rand)
1666- """
1667- )
1668-
16691669 def zipf_sql (self : DuckDB .Generator , expression : exp .Zipf ) -> str :
16701670 """
16711671 Transpile Snowflake's ZIPF to DuckDB using CDF-based inverse sampling.
16721672 Uses a pre-parsed template with placeholders replaced by expression nodes.
16731673 """
16741674 s = expression .this
1675- n = expression .args . get ( "elementcount" )
1676- gen = expression .args . get ( "gen" )
1675+ n = expression .args [ "elementcount" ]
1676+ gen = expression .args [ "gen" ]
16771677
1678- random_expr : exp .Expression
1679- if isinstance (gen , exp .Rand ):
1680- # Use RANDOM() for non-deterministic output
1681- random_expr = exp .Rand ()
1682- elif gen :
1678+ if gen and not isinstance (gen , exp .Rand ):
16831679 # (ABS(HASH(seed)) % 1000000) / 1000000.0
1684- random_expr = exp .Div (
1680+ random_expr : exp . Expression = exp .Div (
16851681 this = exp .Paren (
16861682 this = exp .Mod (
16871683 this = exp .Abs (this = exp .Anonymous (this = "HASH" , expressions = [gen .copy ()])),
@@ -1691,23 +1687,11 @@ def zipf_sql(self: DuckDB.Generator, expression: exp.Zipf) -> str:
16911687 expression = exp .Literal .number (1000000.0 ),
16921688 )
16931689 else :
1690+ # Use RANDOM() for non-deterministic output
16941691 random_expr = exp .Rand ()
16951692
1696- # s, n are required args per Zipf.arg_types
1697- assert s is not None and n is not None
1698- replacements : dict [str , exp .Expression ] = {
1699- "s" : s ,
1700- "n" : n ,
1701- "random_expr" : random_expr ,
1702- }
1703-
1704- def replace_placeholder (node : exp .Expression ) -> exp .Expression :
1705- if isinstance (node , exp .Placeholder ) and node .name in replacements :
1706- return replacements [node .name ].copy ()
1707- return node
1708-
1709- query = self .ZIPF_TEMPLATE .copy ().transform (replace_placeholder )
1710- return f"({ self .sql (query )} )"
1693+ replacements = {"s" : s , "n" : n , "random_expr" : random_expr }
1694+ return f"({ self .sql (exp .replace_placeholders (self .ZIPF_TEMPLATE , ** replacements ))} )"
17111695
17121696 def tobinary_sql (self : DuckDB .Generator , expression : exp .ToBinary ) -> str :
17131697 """
0 commit comments