Skip to content

Commit 020fb05

Browse files
committed
Chore: refactor duckdb ZIPF transpilation logic
1 parent 4aea018 commit 020fb05

File tree

1 file changed

+25
-41
lines changed

1 file changed

+25
-41
lines changed

sqlglot/dialects/duckdb.py

Lines changed: 25 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1580,6 +1580,24 @@ class Generator(generator.Generator):
15801580
exp.NthValue,
15811581
)
15821582

1583+
# Template for ZIPF transpilation - placeholders get replaced with actual parameters
1584+
ZIPF_TEMPLATE: exp.Expression = exp.maybe_parse(
1585+
"""
1586+
WITH rand AS (SELECT :random_expr AS r),
1587+
weights AS (
1588+
SELECT i, 1.0 / POWER(i, :s) AS w
1589+
FROM RANGE(1, :n + 1) AS t(i)
1590+
),
1591+
cdf AS (
1592+
SELECT i, SUM(w) OVER (ORDER BY i) / SUM(w) OVER () AS p
1593+
FROM weights
1594+
)
1595+
SELECT MIN(i)
1596+
FROM cdf
1597+
WHERE p >= (SELECT r FROM rand)
1598+
"""
1599+
)
1600+
15831601
def bitmapbitposition_sql(self: DuckDB.Generator, expression: exp.BitmapBitPosition) -> str:
15841602
"""
15851603
Transpile Snowflake's BITMAP_BIT_POSITION to DuckDB CASE expression.
@@ -1648,40 +1666,18 @@ def randstr_sql(self: DuckDB.Generator, expression: exp.Randstr) -> str:
16481666
)
16491667
return f"({self.sql(query)})"
16501668

1651-
# Template for ZIPF transpilation - placeholders get replaced with actual parameters
1652-
ZIPF_TEMPLATE: t.ClassVar[exp.Expression] = exp.maybe_parse(
1653-
"""
1654-
WITH rand AS (SELECT :random_expr AS r),
1655-
weights AS (
1656-
SELECT i, 1.0 / POWER(i, :s) AS w
1657-
FROM RANGE(1, :n + 1) AS t(i)
1658-
),
1659-
cdf AS (
1660-
SELECT i, SUM(w) OVER (ORDER BY i) / SUM(w) OVER () AS p
1661-
FROM weights
1662-
)
1663-
SELECT MIN(i)
1664-
FROM cdf
1665-
WHERE p >= (SELECT r FROM rand)
1666-
"""
1667-
)
1668-
16691669
def zipf_sql(self: DuckDB.Generator, expression: exp.Zipf) -> str:
16701670
"""
16711671
Transpile Snowflake's ZIPF to DuckDB using CDF-based inverse sampling.
16721672
Uses a pre-parsed template with placeholders replaced by expression nodes.
16731673
"""
16741674
s = expression.this
1675-
n = expression.args.get("elementcount")
1676-
gen = expression.args.get("gen")
1675+
n = expression.args["elementcount"]
1676+
gen = expression.args["gen"]
16771677

1678-
random_expr: exp.Expression
1679-
if isinstance(gen, exp.Rand):
1680-
# Use RANDOM() for non-deterministic output
1681-
random_expr = exp.Rand()
1682-
elif gen:
1678+
if gen and not isinstance(gen, exp.Rand):
16831679
# (ABS(HASH(seed)) % 1000000) / 1000000.0
1684-
random_expr = exp.Div(
1680+
random_expr: exp.Expression = exp.Div(
16851681
this=exp.Paren(
16861682
this=exp.Mod(
16871683
this=exp.Abs(this=exp.Anonymous(this="HASH", expressions=[gen.copy()])),
@@ -1691,23 +1687,11 @@ def zipf_sql(self: DuckDB.Generator, expression: exp.Zipf) -> str:
16911687
expression=exp.Literal.number(1000000.0),
16921688
)
16931689
else:
1690+
# Use RANDOM() for non-deterministic output
16941691
random_expr = exp.Rand()
16951692

1696-
# s, n are required args per Zipf.arg_types
1697-
assert s is not None and n is not None
1698-
replacements: dict[str, exp.Expression] = {
1699-
"s": s,
1700-
"n": n,
1701-
"random_expr": random_expr,
1702-
}
1703-
1704-
def replace_placeholder(node: exp.Expression) -> exp.Expression:
1705-
if isinstance(node, exp.Placeholder) and node.name in replacements:
1706-
return replacements[node.name].copy()
1707-
return node
1708-
1709-
query = self.ZIPF_TEMPLATE.copy().transform(replace_placeholder)
1710-
return f"({self.sql(query)})"
1693+
replacements = {"s": s, "n": n, "random_expr": random_expr}
1694+
return f"({self.sql(exp.replace_placeholders(self.ZIPF_TEMPLATE, **replacements))})"
17111695

17121696
def tobinary_sql(self: DuckDB.Generator, expression: exp.ToBinary) -> str:
17131697
"""

0 commit comments

Comments
 (0)