Skip to content

Commit d0a6cfa

Browse files
authored
Merge branch 'main' into abhishekaslk/character-set-singlestore
2 parents 92c11c2 + 77783da commit d0a6cfa

File tree

13 files changed

+362
-42
lines changed

13 files changed

+362
-42
lines changed

sqlglot/dialects/clickhouse.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -565,6 +565,8 @@ class Parser(parser.Parser):
565565
"MEDIAN": lambda self: self._parse_quantile(),
566566
"COLUMNS": lambda self: self._parse_columns(),
567567
"TUPLE": lambda self: exp.Struct.from_arg_list(self._parse_function_args(alias=True)),
568+
"AND": lambda self: exp.and_(*self._parse_function_args(alias=False)),
569+
"OR": lambda self: exp.or_(*self._parse_function_args(alias=False)),
568570
}
569571

570572
FUNCTION_PARSERS.pop("MATCH")

sqlglot/dialects/duckdb.py

Lines changed: 185 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from sqlglot import exp, generator, parser, tokens, transforms
99

1010
from sqlglot.dialects.dialect import (
11+
DATETIME_DELTA,
1112
Dialect,
1213
JSON_EXTRACT_TYPE,
1314
NormalizationStrategy,
@@ -89,6 +90,78 @@
8990
MAX_BIT_POSITION = exp.Literal.number(32768)
9091

9192

93+
def _last_day_sql(self: DuckDB.Generator, expression: exp.LastDay) -> str:
94+
"""
95+
DuckDB's LAST_DAY only supports finding the last day of a month.
96+
For other date parts (year, quarter, week), we need to implement equivalent logic.
97+
"""
98+
date_expr = expression.this
99+
unit = expression.text("unit")
100+
101+
if not unit or unit.upper() == "MONTH":
102+
# Default behavior - use DuckDB's native LAST_DAY
103+
return self.func("LAST_DAY", date_expr)
104+
105+
if unit.upper() == "YEAR":
106+
# Last day of year: December 31st of the same year
107+
year_expr = exp.func("EXTRACT", "YEAR", date_expr)
108+
make_date_expr = exp.func(
109+
"MAKE_DATE", year_expr, exp.Literal.number(12), exp.Literal.number(31)
110+
)
111+
return self.sql(make_date_expr)
112+
113+
if unit.upper() == "QUARTER":
114+
# Last day of quarter
115+
year_expr = exp.func("EXTRACT", "YEAR", date_expr)
116+
quarter_expr = exp.func("EXTRACT", "QUARTER", date_expr)
117+
118+
# Calculate last month of quarter: quarter * 3. Quarter can be 1 to 4
119+
last_month_expr = exp.Mul(this=quarter_expr, expression=exp.Literal.number(3))
120+
first_day_last_month_expr = exp.func(
121+
"MAKE_DATE", year_expr, last_month_expr, exp.Literal.number(1)
122+
)
123+
124+
# Last day of the last month of the quarter
125+
last_day_expr = exp.func("LAST_DAY", first_day_last_month_expr)
126+
return self.sql(last_day_expr)
127+
128+
if unit.upper() == "WEEK":
129+
# DuckDB DAYOFWEEK: Sunday=0, Monday=1, ..., Saturday=6
130+
dow = exp.func("EXTRACT", "DAYOFWEEK", date_expr)
131+
# Days to the last day of week: (7 - dayofweek) % 7, assuming the last day of week is Sunday (Snowflake)
132+
# Wrap in parentheses to ensure correct precedence
133+
days_to_sunday_expr = exp.Mod(
134+
this=exp.Paren(this=exp.Sub(this=exp.Literal.number(7), expression=dow)),
135+
expression=exp.Literal.number(7),
136+
)
137+
interval_expr = exp.Interval(this=days_to_sunday_expr, unit=exp.var("DAY"))
138+
add_expr = exp.Add(this=date_expr, expression=interval_expr)
139+
cast_expr = exp.cast(add_expr, exp.DataType.Type.DATE)
140+
return self.sql(cast_expr)
141+
142+
self.unsupported(f"Unsupported date part '{unit}' in LAST_DAY function")
143+
return self.function_fallback_sql(expression)
144+
145+
146+
def _is_nanosecond_unit(unit: t.Optional[exp.Expression]) -> bool:
147+
return isinstance(unit, (exp.Var, exp.Literal)) and unit.name.upper() == "NANOSECOND"
148+
149+
150+
def _handle_nanosecond_diff(
151+
self: DuckDB.Generator,
152+
end_time: exp.Expression,
153+
start_time: exp.Expression,
154+
) -> str:
155+
"""Generate NANOSECOND diff using EPOCH_NS since DATE_DIFF doesn't support it."""
156+
end_ns = exp.cast(end_time, exp.DataType.Type.TIMESTAMP_NS)
157+
start_ns = exp.cast(start_time, exp.DataType.Type.TIMESTAMP_NS)
158+
159+
# Build expression tree: EPOCH_NS(end) - EPOCH_NS(start)
160+
return self.sql(
161+
exp.Sub(this=exp.func("EPOCH_NS", end_ns), expression=exp.func("EPOCH_NS", start_ns))
162+
)
163+
164+
92165
def _to_boolean_sql(self: DuckDB.Generator, expression: exp.ToBoolean) -> str:
93166
"""
94167
Transpile TO_BOOLEAN and TRY_TO_BOOLEAN functions from Snowflake to DuckDB equivalent.
@@ -162,6 +235,11 @@ def _date_sql(self: DuckDB.Generator, expression: exp.Date) -> str:
162235

163236
# BigQuery -> DuckDB conversion for the TIME_DIFF function
164237
def _timediff_sql(self: DuckDB.Generator, expression: exp.TimeDiff) -> str:
238+
unit = expression.unit
239+
240+
if _is_nanosecond_unit(unit):
241+
return _handle_nanosecond_diff(self, expression.expression, expression.this)
242+
165243
this = exp.cast(expression.this, exp.DataType.Type.TIME)
166244
expr = exp.cast(expression.expression, exp.DataType.Type.TIME)
167245

@@ -170,6 +248,35 @@ def _timediff_sql(self: DuckDB.Generator, expression: exp.TimeDiff) -> str:
170248
return self.func("DATE_DIFF", unit_to_str(expression), expr, this)
171249

172250

251+
def _date_delta_to_binary_interval_op(
252+
cast: bool = True,
253+
) -> t.Callable[[DuckDB.Generator, DATETIME_DELTA], str]:
254+
"""DuckDB override to handle NANOSECOND operations; delegates other units to base."""
255+
base_impl = date_delta_to_binary_interval_op(cast=cast)
256+
257+
def _duckdb_date_delta_sql(self: DuckDB.Generator, expression: DATETIME_DELTA) -> str:
258+
unit = expression.unit
259+
260+
# Handle NANOSECOND unit (DuckDB doesn't support INTERVAL ... NANOSECOND)
261+
if _is_nanosecond_unit(unit):
262+
interval_value = expression.expression
263+
if isinstance(interval_value, exp.Interval):
264+
interval_value = interval_value.this
265+
266+
timestamp_ns = exp.cast(expression.this, exp.DataType.Type.TIMESTAMP_NS)
267+
268+
return self.sql(
269+
exp.func(
270+
"MAKE_TIMESTAMP_NS",
271+
exp.Add(this=exp.func("EPOCH_NS", timestamp_ns), expression=interval_value),
272+
)
273+
)
274+
275+
return base_impl(self, expression)
276+
277+
return _duckdb_date_delta_sql
278+
279+
173280
@unsupported_args(("expression", "DuckDB's ARRAY_SORT does not support a comparator."))
174281
def _array_sort_sql(self: DuckDB.Generator, expression: exp.ArraySort) -> str:
175282
return self.func("ARRAY_SORT", expression.this)
@@ -386,9 +493,13 @@ def _build_week_trunc_expression(date_expr: exp.Expression, start_dow: int) -> e
386493

387494

388495
def _date_diff_sql(self: DuckDB.Generator, expression: exp.DateDiff) -> str:
496+
unit = expression.unit
497+
498+
if _is_nanosecond_unit(unit):
499+
return _handle_nanosecond_diff(self, expression.this, expression.expression)
500+
389501
this = _implicit_datetime_cast(expression.this)
390502
expr = _implicit_datetime_cast(expression.expression)
391-
unit = expression.args.get("unit")
392503

393504
# DuckDB's WEEK diff does not respect Monday crossing (week boundaries), it checks (end_day - start_day) / 7:
394505
# SELECT DATE_DIFF('WEEK', CAST('2024-12-13' AS DATE), CAST('2024-12-17' AS DATE)) --> 0 (Monday crossed)
@@ -1200,15 +1311,15 @@ class Generator(generator.Generator):
12001311
),
12011312
exp.DataType: _datatype_sql,
12021313
exp.Date: _date_sql,
1203-
exp.DateAdd: date_delta_to_binary_interval_op(),
1314+
exp.DateAdd: _date_delta_to_binary_interval_op(),
12041315
exp.DateFromParts: rename_func("MAKE_DATE"),
1205-
exp.DateSub: date_delta_to_binary_interval_op(),
1316+
exp.DateSub: _date_delta_to_binary_interval_op(),
12061317
exp.DateDiff: _date_diff_sql,
12071318
exp.DateStrToDate: datestrtodate_sql,
12081319
exp.Datetime: no_datetime_sql,
12091320
exp.DatetimeDiff: _date_diff_sql,
1210-
exp.DatetimeSub: date_delta_to_binary_interval_op(),
1211-
exp.DatetimeAdd: date_delta_to_binary_interval_op(),
1321+
exp.DatetimeSub: _date_delta_to_binary_interval_op(),
1322+
exp.DatetimeAdd: _date_delta_to_binary_interval_op(),
12121323
exp.DateToDi: lambda self,
12131324
e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)",
12141325
exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False),
@@ -1272,16 +1383,16 @@ class Generator(generator.Generator):
12721383
),
12731384
exp.Struct: _struct_sql,
12741385
exp.Transform: rename_func("LIST_TRANSFORM"),
1275-
exp.TimeAdd: date_delta_to_binary_interval_op(),
1276-
exp.TimeSub: date_delta_to_binary_interval_op(),
1386+
exp.TimeAdd: _date_delta_to_binary_interval_op(),
1387+
exp.TimeSub: _date_delta_to_binary_interval_op(),
12771388
exp.Time: no_time_sql,
12781389
exp.TimeDiff: _timediff_sql,
12791390
exp.Timestamp: no_timestamp_sql,
1280-
exp.TimestampAdd: date_delta_to_binary_interval_op(),
1391+
exp.TimestampAdd: _date_delta_to_binary_interval_op(),
12811392
exp.TimestampDiff: lambda self, e: self.func(
12821393
"DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this
12831394
),
1284-
exp.TimestampSub: date_delta_to_binary_interval_op(),
1395+
exp.TimestampSub: _date_delta_to_binary_interval_op(),
12851396
exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)),
12861397
exp.TimeStrToTime: timestrtotime_sql,
12871398
exp.TimeStrToUnix: lambda self, e: self.func(
@@ -1292,7 +1403,7 @@ class Generator(generator.Generator):
12921403
exp.TimeToUnix: rename_func("EPOCH"),
12931404
exp.TsOrDiToDi: lambda self,
12941405
e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)",
1295-
exp.TsOrDsAdd: date_delta_to_binary_interval_op(),
1406+
exp.TsOrDsAdd: _date_delta_to_binary_interval_op(),
12961407
exp.TsOrDsDiff: lambda self, e: self.func(
12971408
"DATE_DIFF",
12981409
f"'{e.args.get('unit') or 'DAY'}'",
@@ -1323,6 +1434,7 @@ class Generator(generator.Generator):
13231434
exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"),
13241435
exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"),
13251436
exp.DateBin: rename_func("TIME_BUCKET"),
1437+
exp.LastDay: _last_day_sql,
13261438
}
13271439

13281440
SUPPORTED_JSON_PATH_PARTS = {
@@ -1459,6 +1571,42 @@ class Generator(generator.Generator):
14591571
exp.NthValue,
14601572
)
14611573

1574+
# Template for ZIPF transpilation - placeholders get replaced with actual parameters
1575+
ZIPF_TEMPLATE: exp.Expression = exp.maybe_parse(
1576+
"""
1577+
WITH rand AS (SELECT :random_expr AS r),
1578+
weights AS (
1579+
SELECT i, 1.0 / POWER(i, :s) AS w
1580+
FROM RANGE(1, :n + 1) AS t(i)
1581+
),
1582+
cdf AS (
1583+
SELECT i, SUM(w) OVER (ORDER BY i) / SUM(w) OVER () AS p
1584+
FROM weights
1585+
)
1586+
SELECT MIN(i)
1587+
FROM cdf
1588+
WHERE p >= (SELECT r FROM rand)
1589+
"""
1590+
)
1591+
1592+
# Template for RANDSTR transpilation - placeholders get replaced with actual parameters
1593+
RANDSTR_TEMPLATE: exp.Expression = exp.maybe_parse(
1594+
f"""
1595+
SELECT LISTAGG(
1596+
SUBSTRING(
1597+
'{RANDSTR_CHAR_POOL}',
1598+
1 + CAST(FLOOR(random_value * 62) AS INT),
1599+
1
1600+
),
1601+
''
1602+
)
1603+
FROM (
1604+
SELECT (ABS(HASH(i + :seed)) % 1000) / 1000.0 AS random_value
1605+
FROM RANGE(:length) AS t(i)
1606+
)
1607+
""",
1608+
)
1609+
14621610
def bitmapbitposition_sql(self: DuckDB.Generator, expression: exp.BitmapBitPosition) -> str:
14631611
"""
14641612
Transpile Snowflake's BITMAP_BIT_POSITION to DuckDB CASE expression.
@@ -1485,6 +1633,7 @@ def bitmapbitposition_sql(self: DuckDB.Generator, expression: exp.BitmapBitPosit
14851633
def randstr_sql(self: DuckDB.Generator, expression: exp.Randstr) -> str:
14861634
"""
14871635
Transpile Snowflake's RANDSTR to DuckDB equivalent using deterministic hash-based random.
1636+
Uses a pre-parsed template with placeholders replaced by expression nodes.
14881637
14891638
RANDSTR(length, generator) generates a random string of specified length.
14901639
- With numeric seed: Use HASH(i + seed) for deterministic output (same seed = same result)
@@ -1505,27 +1654,35 @@ def randstr_sql(self: DuckDB.Generator, expression: exp.Randstr) -> str:
15051654
# No generator specified, use default seed (arbitrary but deterministic)
15061655
seed_value = exp.Literal.number(RANDSTR_SEED)
15071656

1508-
length_sql = self.sql(length)
1509-
seed_sql = self.sql(seed_value)
1657+
replacements = {"seed": seed_value, "length": length}
1658+
return f"({self.sql(exp.replace_placeholders(self.RANDSTR_TEMPLATE, **replacements))})"
1659+
1660+
def zipf_sql(self: DuckDB.Generator, expression: exp.Zipf) -> str:
1661+
"""
1662+
Transpile Snowflake's ZIPF to DuckDB using CDF-based inverse sampling.
1663+
Uses a pre-parsed template with placeholders replaced by expression nodes.
1664+
"""
1665+
s = expression.this
1666+
n = expression.args["elementcount"]
1667+
gen = expression.args["gen"]
15101668

1511-
query: exp.Select = exp.maybe_parse(
1512-
f"""
1513-
SELECT LISTAGG(
1514-
SUBSTRING(
1515-
'{RANDSTR_CHAR_POOL}',
1516-
1 + CAST(FLOOR(random_value * 62) AS INT),
1517-
1
1669+
if gen and not isinstance(gen, exp.Rand):
1670+
# (ABS(HASH(seed)) % 1000000) / 1000000.0
1671+
random_expr: exp.Expression = exp.Div(
1672+
this=exp.Paren(
1673+
this=exp.Mod(
1674+
this=exp.Abs(this=exp.Anonymous(this="HASH", expressions=[gen.copy()])),
1675+
expression=exp.Literal.number(1000000),
1676+
)
15181677
),
1519-
''
1520-
)
1521-
FROM (
1522-
SELECT (ABS(HASH(i + {seed_sql})) % 1000) / 1000.0 AS random_value
1523-
FROM RANGE({length_sql}) AS t(i)
1678+
expression=exp.Literal.number(1000000.0),
15241679
)
1525-
""",
1526-
dialect="duckdb",
1527-
)
1528-
return f"({self.sql(query)})"
1680+
else:
1681+
# Use RANDOM() for non-deterministic output
1682+
random_expr = exp.Rand()
1683+
1684+
replacements = {"s": s, "n": n, "random_expr": random_expr}
1685+
return f"({self.sql(exp.replace_placeholders(self.ZIPF_TEMPLATE, **replacements))})"
15291686

15301687
def tobinary_sql(self: DuckDB.Generator, expression: exp.ToBinary) -> str:
15311688
"""

sqlglot/dialects/postgres.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,7 @@ class Tokenizer(tokens.Tokenizer):
374374
"NAME": TokenType.NAME,
375375
"OID": TokenType.OBJECT_IDENTIFIER,
376376
"ONLY": TokenType.ONLY,
377+
"POINT": TokenType.POINT,
377378
"REFRESH": TokenType.COMMAND,
378379
"REINDEX": TokenType.COMMAND,
379380
"RESET": TokenType.COMMAND,
@@ -446,6 +447,11 @@ class Parser(parser.Parser):
446447
"LEVENSHTEIN_LESS_EQUAL": _build_levenshtein_less_equal,
447448
"JSON_OBJECT_AGG": lambda args: exp.JSONObjectAgg(expressions=args),
448449
"JSONB_OBJECT_AGG": exp.JSONBObjectAgg.from_arg_list,
450+
"WIDTH_BUCKET": lambda args: exp.WidthBucket(
451+
this=seq_get(args, 0), threshold=seq_get(args, 1)
452+
)
453+
if len(args) == 2
454+
else exp.WidthBucket.from_arg_list(args),
449455
}
450456

451457
NO_PAREN_FUNCTIONS = {

sqlglot/dialects/singlestore.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ class Tokenizer(MySQL.Tokenizer):
8282
"::$": TokenType.DCOLONDOLLAR,
8383
"::%": TokenType.DCOLONPERCENT,
8484
"::?": TokenType.DCOLONQMARK,
85+
"RECORD": TokenType.STRUCT,
8586
}
8687

8788
class Parser(MySQL.Parser):
@@ -328,6 +329,7 @@ class Generator(MySQL.Generator):
328329
SUPPORTS_UESCAPE = False
329330
NULL_ORDERING_SUPPORTED = True
330331
MATCH_AGAINST_TABLE_PREFIX = "TABLE "
332+
STRUCT_DELIMITER = ("(", ")")
331333

332334
@staticmethod
333335
def _unicode_substitute(m: re.Match[str]) -> str:
@@ -613,7 +615,6 @@ def _unicode_substitute(m: re.Match[str]) -> str:
613615
exp.DataType.Type.SERIAL,
614616
exp.DataType.Type.SMALLSERIAL,
615617
exp.DataType.Type.SMALLMONEY,
616-
exp.DataType.Type.STRUCT,
617618
exp.DataType.Type.SUPER,
618619
exp.DataType.Type.TIMETZ,
619620
exp.DataType.Type.TIMESTAMPNTZ,
@@ -654,6 +655,7 @@ def _unicode_substitute(m: re.Match[str]) -> str:
654655
exp.DataType.Type.LINESTRING: "GEOGRAPHY",
655656
exp.DataType.Type.POLYGON: "GEOGRAPHY",
656657
exp.DataType.Type.MULTIPOLYGON: "GEOGRAPHY",
658+
exp.DataType.Type.STRUCT: "RECORD",
657659
exp.DataType.Type.JSONB: "BSON",
658660
exp.DataType.Type.TIMESTAMP: "TIMESTAMP",
659661
exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP",

sqlglot/dialects/trino.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ def _parse_json_query(self) -> exp.JSONExtract:
6969
)
7070

7171
class Generator(Presto.Generator):
72+
EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = True
7273
PROPERTIES_LOCATION = {
7374
**Presto.Generator.PROPERTIES_LOCATION,
7475
exp.LocationProperty: exp.Properties.Location.POST_WITH,

0 commit comments

Comments
 (0)