Skip to content

Commit eaf614d

Browse files
authored
(Re)Support old syntax for approx_percentile_cont and approx_percentile_cont_with_weight (#16999)
* Add sqllogictests * Allow both new and old sytanx for approx_percentile_cont and approx_percentile_cont_with_weight * Update docs * Add documentation and more tests
1 parent dd5e092 commit eaf614d

File tree

5 files changed

+99
-7
lines changed

5 files changed

+99
-7
lines changed

datafusion/functions-aggregate/src/approx_percentile_cont.rs

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,24 @@ pub fn approx_percentile_cont(
9191
+-----------------------------------------------------------------------+
9292
| 65.0 |
9393
+-----------------------------------------------------------------------+
94-
```"#,
94+
```
95+
An alternate syntax is also supported:
96+
```sql
97+
> SELECT approx_percentile_cont(column_name, 0.75) FROM table_name;
98+
+-----------------------------------------------+
99+
| approx_percentile_cont(column_name, 0.75) |
100+
+-----------------------------------------------+
101+
| 65.0 |
102+
+-----------------------------------------------+
103+
104+
> SELECT approx_percentile_cont(column_name, 0.75, 100) FROM table_name;
105+
+----------------------------------------------------------+
106+
| approx_percentile_cont(column_name, 0.75, 100) |
107+
+----------------------------------------------------------+
108+
| 65.0 |
109+
+----------------------------------------------------------+
110+
```
111+
"#,
95112
standard_argument(name = "expression",),
96113
argument(
97114
name = "percentile",

datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,16 @@ pub fn approx_percentile_cont_with_weight(
8585
+--------------------------------------------------------------------------------------------------+
8686
| 78.5 |
8787
+--------------------------------------------------------------------------------------------------+
88+
```
89+
An alternative syntax is also supported:
90+
91+
```sql
92+
> SELECT approx_percentile_cont_with_weight(column_name, weight_column, 0.90) FROM table_name;
93+
+--------------------------------------------------+
94+
| approx_percentile_cont_with_weight(column_name, weight_column, 0.90) |
95+
+--------------------------------------------------+
96+
| 78.5 |
97+
+--------------------------------------------------+
8898
```"#,
8999
standard_argument(name = "expression", prefix = "The"),
90100
argument(

datafusion/sql/src/expr/function.rs

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -392,10 +392,6 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
392392
} else {
393393
// User defined aggregate functions (UDAF) have precedence in case it has the same name as a scalar built-in function
394394
if let Some(fm) = self.context_provider.get_aggregate_meta(&name) {
395-
if fm.is_ordered_set_aggregate() && within_group.is_empty() {
396-
return plan_err!("WITHIN GROUP clause is required when calling ordered set aggregate function({})", fm.name());
397-
}
398-
399395
if null_treatment.is_some() && !fm.supports_null_handling_clause() {
400396
return plan_err!(
401397
"[IGNORE | RESPECT] NULLS are not permitted for {}",
@@ -415,7 +411,8 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
415411
None,
416412
)?;
417413

418-
// add target column expression in within group clause to function arguments
414+
// Add the WITHIN GROUP ordering expressions to the front of the argument list
415+
// So function(arg) WITHIN GROUP (ORDER BY x) becomes function(x, arg)
419416
if !within_group.is_empty() {
420417
args = within_group
421418
.iter()

datafusion/sqllogictest/test_files/aggregate.slt

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1337,7 +1337,7 @@ SELECT approx_distinct(c9) AS a, approx_distinct(c9) AS b FROM aggregate_test_10
13371337
## Column `c12` is omitted due to a large relative error (~10%) due to the small
13381338
## float values.
13391339

1340-
#csv_query_approx_percentile_cont (c2)
1340+
# csv_query_approx_percentile_cont (c2)
13411341
query B
13421342
SELECT (ABS(1 - CAST(approx_percentile_cont(0.1) WITHIN GROUP (ORDER BY c2) AS DOUBLE) / 1.0) < 0.05) AS q FROM aggregate_test_100
13431343
----
@@ -1353,6 +1353,23 @@ SELECT (ABS(1 - CAST(approx_percentile_cont(0.9) WITHIN GROUP (ORDER BY c2) AS D
13531353
----
13541354
true
13551355

1356+
1357+
# csv_query_approx_percentile_cont (c2, alternate syntax, should be the same as above)
1358+
query B
1359+
SELECT (ABS(1 - CAST(approx_percentile_cont(c2, 0.1) AS DOUBLE) / 1.0) < 0.05) AS q FROM aggregate_test_100
1360+
----
1361+
true
1362+
1363+
query B
1364+
SELECT (ABS(1 - CAST(approx_percentile_cont(c2, 0.5) AS DOUBLE) / 3.0) < 0.05) AS q FROM aggregate_test_100
1365+
----
1366+
true
1367+
1368+
query B
1369+
SELECT (ABS(1 - CAST(approx_percentile_cont(c2, 0.9) AS DOUBLE) / 5.0) < 0.05) AS q FROM aggregate_test_100
1370+
----
1371+
true
1372+
13561373
# csv_query_approx_percentile_cont (c3)
13571374
query B
13581375
SELECT (ABS(1 - CAST(approx_percentile_cont(0.1) WITHIN GROUP (ORDER BY c3) AS DOUBLE) / -95.3) < 0.05) AS q FROM aggregate_test_100
@@ -1793,6 +1810,17 @@ c 122
17931810
d 124
17941811
e 115
17951812

1813+
1814+
# csv_query_approx_percentile_cont_with_weight (should be the same as above)
1815+
query TI
1816+
SELECT c1, approx_percentile_cont(c3, 0.95) AS c3_p95 FROM aggregate_test_100 GROUP BY 1 ORDER BY 1
1817+
----
1818+
a 73
1819+
b 68
1820+
c 122
1821+
d 124
1822+
e 115
1823+
17961824
query TI
17971825
SELECT c1, approx_percentile_cont(0.95) WITHIN GROUP (ORDER BY c3 DESC) AS c3_p95 FROM aggregate_test_100 GROUP BY 1 ORDER BY 1
17981826
----
@@ -1812,6 +1840,17 @@ c 122
18121840
d 124
18131841
e 115
18141842

1843+
# csv_query_approx_percentile_cont_with_weight alternate syntax
1844+
query TI
1845+
SELECT c1, approx_percentile_cont_with_weight(c3, 1, 0.95) AS c3_p95 FROM aggregate_test_100 GROUP BY 1 ORDER BY 1
1846+
----
1847+
a 73
1848+
b 68
1849+
c 122
1850+
d 124
1851+
e 115
1852+
1853+
18151854
query TI
18161855
SELECT c1, approx_percentile_cont_with_weight(1, 0.95) WITHIN GROUP (ORDER BY c3 DESC) AS c3_p95 FROM aggregate_test_100 GROUP BY 1 ORDER BY 1
18171856
----

docs/source/user-guide/sql/aggregate_functions.md

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1065,6 +1065,24 @@ approx_percentile_cont(percentile [, centroids]) WITHIN GROUP (ORDER BY expressi
10651065
+-----------------------------------------------------------------------+
10661066
```
10671067

1068+
An alternate syntax is also supported:
1069+
1070+
```sql
1071+
> SELECT approx_percentile_cont(column_name, 0.75) FROM table_name;
1072+
+-----------------------------------------------+
1073+
| approx_percentile_cont(column_name, 0.75) |
1074+
+-----------------------------------------------+
1075+
| 65.0 |
1076+
+-----------------------------------------------+
1077+
1078+
> SELECT approx_percentile_cont(column_name, 0.75, 100) FROM table_name;
1079+
+----------------------------------------------------------+
1080+
| approx_percentile_cont(column_name, 0.75, 100) |
1081+
+----------------------------------------------------------+
1082+
| 65.0 |
1083+
+----------------------------------------------------------+
1084+
```
1085+
10681086
### `approx_percentile_cont_with_weight`
10691087

10701088
Returns the weighted approximate percentile of input values using the t-digest algorithm.
@@ -1096,3 +1114,14 @@ approx_percentile_cont_with_weight(weight, percentile [, centroids]) WITHIN GROU
10961114
| 78.5 |
10971115
+--------------------------------------------------------------------------------------------------+
10981116
```
1117+
1118+
An alternative syntax is also supported:
1119+
1120+
```sql
1121+
> SELECT approx_percentile_cont_with_weight(column_name, weight_column, 0.90) FROM table_name;
1122+
+--------------------------------------------------+
1123+
| approx_percentile_cont_with_weight(column_name, weight_column, 0.90) |
1124+
+--------------------------------------------------+
1125+
| 78.5 |
1126+
+--------------------------------------------------+
1127+
```

0 commit comments

Comments
 (0)