Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 39 additions & 1 deletion sqlglot/dialects/duckdb.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

from decimal import Decimal
from itertools import groupby
import re
import typing as t
Expand Down Expand Up @@ -1402,7 +1403,7 @@ def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str:
if isinstance(this, exp.First):
this = exp.AnyValue(this=this.this)

if not isinstance(this, exp.AnyValue):
if not isinstance(this, (exp.AnyValue, exp.ApproxQuantiles)):
self.unsupported("IGNORE NULLS is not supported for non-window functions.")

return self.sql(this)
Expand Down Expand Up @@ -1594,3 +1595,40 @@ def round_sql(self, expression: exp.Round) -> str:
truncate = None

return self.func(func, this, decimals, truncate)

def approxquantiles_sql(self, expression: exp.ApproxQuantiles) -> str:
"""
BigQuery's APPROX_QUANTILES(expr, n) returns an array of n+1 approximate quantile values
dividing the input distribution into n equal-sized buckets.

Both BigQuery and DuckDB use approximate algorithms for quantile estimation, but BigQuery
does not document the specific algorithm used so results may differ. DuckDB does not
support RESPECT NULLS.
"""
this = expression.this
if isinstance(this, exp.Distinct):
# APPROX_QUANTILES requires 2 args and DISTINCT node grabs both
if len(this.expressions) < 2:
self.unsupported("APPROX_QUANTILES requires a bucket count argument")
return self.function_fallback_sql(expression)
num_quantiles_expr = this.expressions[1].pop()
else:
num_quantiles_expr = expression.expression

if not isinstance(num_quantiles_expr, exp.Literal) or not num_quantiles_expr.is_int:
self.unsupported("APPROX_QUANTILES bucket count must be a positive integer")
return self.function_fallback_sql(expression)

num_quantiles = t.cast(int, num_quantiles_expr.to_py())
if num_quantiles <= 0:
self.unsupported("APPROX_QUANTILES bucket count must be a positive integer")
return self.function_fallback_sql(expression)

quantiles = [
exp.Literal.number(Decimal(i) / Decimal(num_quantiles))
for i in range(num_quantiles + 1)
]

return self.sql(
exp.ApproxQuantile(this=this, quantile=exp.Array(expressions=quantiles))
)
90 changes: 90 additions & 0 deletions tests/dialects/test_bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -3451,3 +3451,93 @@ def test_round(self):
"duckdb": "SELECT ROUND_EVEN(CAST('2.25' AS DECIMAL), 1) AS value",
},
)

def test_approx_quantiles(self):
self.validate_identity("APPROX_QUANTILES(x, 2)")
self.validate_identity("APPROX_QUANTILES(FALSE OR TRUE, 2)")
self.validate_identity("APPROX_QUANTILES((SELECT 1 AS val), CAST(2.1 AS INT64))")
self.validate_identity("APPROX_QUANTILES(DISTINCT x, 2)")
self.validate_identity("APPROX_QUANTILES(x, 2 RESPECT NULLS)")
self.validate_identity("APPROX_QUANTILES(x, 2 IGNORE NULLS)")
self.validate_identity("APPROX_QUANTILES(DISTINCT x, 2 RESPECT NULLS)")

def test_approx_quantiles_to_duckdb(self):
self.validate_all(
"APPROX_QUANTILES(x, 1)",
write={"duckdb": "APPROX_QUANTILE(x, [0, 1])"},
)
self.validate_all(
"APPROX_QUANTILES(x, 2)",
write={"duckdb": "APPROX_QUANTILE(x, [0, 0.5, 1])"},
)
self.validate_all(
"APPROX_QUANTILES(x, 4)",
write={"duckdb": "APPROX_QUANTILE(x, [0, 0.25, 0.5, 0.75, 1])"},
)
self.validate_all(
"APPROX_QUANTILES(DISTINCT x, 2)",
write={"duckdb": "APPROX_QUANTILE(DISTINCT x, [0, 0.5, 1])"},
)

with self.subTest("APPROX_QUANTILES 100 buckets"):
result = self.parse_one("APPROX_QUANTILES(x, 100)").sql("duckdb")
self.assertEqual(result.count("APPROX_QUANTILE("), 1)
self.assertIn("0.01", result)
self.assertIn("0.99", result)
self.assertRegex(result, r"APPROX_QUANTILE\(x, \[.*\]\)")

for expr in ("x + y", "CASE WHEN x > 0 THEN x ELSE 0 END", "ABS(x)"):
with self.subTest(expr=expr):
self.validate_all(
f"APPROX_QUANTILES({expr}, 2)",
write={"duckdb": f"APPROX_QUANTILE({expr}, [0, 0.5, 1])"},
)

with self.subTest("non-literal bucket count"):
with self.assertRaises(UnsupportedError):
self.parse_one("APPROX_QUANTILES(x, bucket_count)").sql(
"duckdb", unsupported_level=ErrorLevel.RAISE
)

with self.subTest("non-integer bucket count"):
for value in ("0", "-1", "2.5"):
with self.subTest(value=value):
with self.assertRaises(UnsupportedError):
self.parse_one(f"APPROX_QUANTILES(x, {value})").sql(
"duckdb", unsupported_level=ErrorLevel.RAISE
)

with self.subTest("NULL bucket count"):
with self.assertRaises(UnsupportedError):
self.parse_one("APPROX_QUANTILES(x, NULL)").sql(
"duckdb", unsupported_level=ErrorLevel.RAISE
)

with self.subTest("missing bucket count"):
with self.assertRaises(UnsupportedError):
self.parse_one("APPROX_QUANTILES(x)").sql(
"duckdb", unsupported_level=ErrorLevel.RAISE
)

with self.subTest("missing bucket count with DISTINCT"):
with self.assertRaises(UnsupportedError):
self.parse_one("APPROX_QUANTILES(DISTINCT x)").sql(
"duckdb", unsupported_level=ErrorLevel.RAISE
)

with self.subTest("APPROX_QUANTILES IGNORE NULLS"):
# No warning: IGNORE NULLS is the default behavior in DuckDB
from sqlglot.generator import logger as generator_logger

with mock.patch.object(generator_logger, "warning") as mock_warning:
self.validate_all(
"APPROX_QUANTILES(x, 2 IGNORE NULLS)",
write={"duckdb": "APPROX_QUANTILE(x, [0, 0.5, 1])"},
)
mock_warning.assert_not_called()

with self.subTest("APPROX_QUANTILES RESPECT NULLS"):
with self.assertRaises(UnsupportedError):
self.parse_one("APPROX_QUANTILES(x, 2 RESPECT NULLS)").sql(
"duckdb", unsupported_level=ErrorLevel.RAISE
)