Rename is_ordered_set_aggregate to supports_within_group_clause for UDAFs (#18397)

Jefffrey · web-flow · commit 5fa1e7ee9d99 · 2025-11-01T10:27:49.000Z
## Which issue does this PR close?  - Closes #18280 ## Rationale for this change  `AggregateUDFImpl::is_ordered_set_aggregate` is confusingly named as all it does currently is permit usage of `WITHIN GROUP` SQL syntax. I don't think it would have any functionality in the future beyond this. Also makes it easier if in future we decide to implement [hypothetical-set aggregate functions](https://www.postgresql.org/docs/9.4/functions-aggregate.html#FUNCTIONS-HYPOTHETICAL-TABLE) too, since we wouldn't need a `is_hypothetical_set_aggregate` variation either. ## What changes are included in this PR?  Rename `AggregateUDFImpl::is_ordered_set_aggregate` to `AggregateUDFImpl::supports_within_group_clause`. ## Are these changes tested?  Existing tests. ## Are there any user-facing changes?  Yes. Added section to upgrade guide.
diff --git a/datafusion/expr/src/udaf.rs b/datafusion/expr/src/udaf.rs
@@ -329,9 +329,9 @@ impl AggregateUDF {
         self.inner.supports_null_handling_clause()
     }
 
-    /// See [`AggregateUDFImpl::is_ordered_set_aggregate`] for more details.
-    pub fn is_ordered_set_aggregate(&self) -> bool {
-        self.inner.is_ordered_set_aggregate()
+    /// See [`AggregateUDFImpl::supports_within_group_clause`] for more details.
+    pub fn supports_within_group_clause(&self) -> bool {
+        self.inner.supports_within_group_clause()
     }
 
     /// Returns the documentation for this Aggregate UDF.
@@ -746,18 +746,25 @@ pub trait AggregateUDFImpl: Debug + DynEq + DynHash + Send + Sync {
         true
     }
 
-    /// If this function is an ordered-set aggregate function, return `true`.
-    /// Otherwise, return `false` (default).
+    /// If this function supports the `WITHIN GROUP (ORDER BY column [ASC|DESC])`
+    /// SQL syntax, return `true`. Otherwise, return `false` (default) which will
+    /// cause an error when parsing SQL where this syntax is detected for this
+    /// function.
+    ///
+    /// This function should return `true` for ordered-set aggregate functions
+    /// only.
+    ///
+    /// # Ordered-set aggregate functions
     ///
     /// Ordered-set aggregate functions allow specifying a sort order that affects
     /// how the function calculates its result, unlike other aggregate functions
-    /// like `SUM` or `COUNT`. For example, `percentile_cont` is an ordered-set
+    /// like `sum` or `count`. For example, `percentile_cont` is an ordered-set
     /// aggregate function that calculates the exact percentile value from a list
     /// of values; the output of calculating the `0.75` percentile depends on if
     /// you're calculating on an ascending or descending list of values.
     ///
-    /// Setting this to return `true` affects only SQL parsing & planning; it allows
-    /// use of the `WITHIN GROUP` clause to specify this order, for example:
+    /// An example of how an ordered-set aggregate function is called with the
+    /// `WITHIN GROUP` SQL syntax:
     ///
     /// ```sql
     /// -- Ascending
@@ -784,15 +791,11 @@ pub trait AggregateUDFImpl: Debug + DynEq + DynHash + Send + Sync {
     /// without the `WITHIN GROUP` clause, though a default of ascending is the
     /// standard practice.
     ///
-    /// Note that setting this to `true` does not guarantee input sort order to
-    /// the aggregate function; it expects the function to handle ordering the
-    /// input values themselves (e.g. `percentile_cont` must buffer and sort
-    /// the values internally). That is, DataFusion does not introduce any kind
-    /// of sort into the plan for these functions.
-    ///
-    /// Setting this to `false` disallows calling this function with the `WITHIN GROUP`
-    /// clause.
-    fn is_ordered_set_aggregate(&self) -> bool {
+    /// Ordered-set aggregate function implementations are responsible for handling
+    /// the input sort order themselves (e.g. `percentile_cont` must buffer and
+    /// sort the values internally). That is, DataFusion does not introduce any
+    /// kind of sort into the plan for these functions with this syntax.
+    fn supports_within_group_clause(&self) -> bool {
         false
     }
 
@@ -843,7 +846,7 @@ pub fn udaf_default_schema_name<F: AggregateUDFImpl + ?Sized>(
 
     // exclude the first function argument(= column) in ordered set aggregate function,
     // because it is duplicated with the WITHIN GROUP clause in schema name.
-    let args = if func.is_ordered_set_aggregate() && !order_by.is_empty() {
+    let args = if func.supports_within_group_clause() && !order_by.is_empty() {
         &args[1..]
     } else {
         &args[..]
@@ -867,7 +870,7 @@ pub fn udaf_default_schema_name<F: AggregateUDFImpl + ?Sized>(
     };
 
     if !order_by.is_empty() {
-        let clause = match func.is_ordered_set_aggregate() {
+        let clause = match func.supports_within_group_clause() {
             true => "WITHIN GROUP",
             false => "ORDER BY",
         };
@@ -1259,8 +1262,8 @@ impl AggregateUDFImpl for AliasedAggregateUDFImpl {
         self.inner.supports_null_handling_clause()
     }
 
-    fn is_ordered_set_aggregate(&self) -> bool {
-        self.inner.is_ordered_set_aggregate()
+    fn supports_within_group_clause(&self) -> bool {
+        self.inner.supports_within_group_clause()
     }
 
     fn set_monotonicity(&self, data_type: &DataType) -> SetMonotonicity {
diff --git a/datafusion/functions-aggregate/src/approx_percentile_cont.rs b/datafusion/functions-aggregate/src/approx_percentile_cont.rs
@@ -319,7 +319,7 @@ impl AggregateUDFImpl for ApproxPercentileCont {
         false
     }
 
-    fn is_ordered_set_aggregate(&self) -> bool {
+    fn supports_within_group_clause(&self) -> bool {
         true
     }
 
diff --git a/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs b/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs
@@ -262,7 +262,7 @@ impl AggregateUDFImpl for ApproxPercentileContWithWeight {
         false
     }
 
-    fn is_ordered_set_aggregate(&self) -> bool {
+    fn supports_within_group_clause(&self) -> bool {
         true
     }
 
diff --git a/datafusion/functions-aggregate/src/percentile_cont.rs b/datafusion/functions-aggregate/src/percentile_cont.rs
@@ -360,7 +360,7 @@ impl AggregateUDFImpl for PercentileCont {
         false
     }
 
-    fn is_ordered_set_aggregate(&self) -> bool {
+    fn supports_within_group_clause(&self) -> bool {
         true
     }
 
diff --git a/datafusion/sql/src/expr/function.rs b/datafusion/sql/src/expr/function.rs
@@ -467,7 +467,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                 let mut args =
                     self.function_args_to_expr(args, schema, planner_context)?;
 
-                let order_by = if fm.is_ordered_set_aggregate() {
+                let order_by = if fm.supports_within_group_clause() {
                     let within_group = self.order_by_to_sort_expr(
                         within_group,
                         schema,
diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs
@@ -336,7 +336,7 @@ impl Unparser<'_> {
                     None => None,
                 };
                 let within_group: Vec<ast::OrderByExpr> =
-                    if agg.func.is_ordered_set_aggregate() {
+                    if agg.func.supports_within_group_clause() {
                         order_by
                             .iter()
                             .map(|sort_expr| self.sort_to_sql(sort_expr))
diff --git a/docs/source/library-user-guide/upgrading.md b/docs/source/library-user-guide/upgrading.md
@@ -133,20 +133,16 @@ The `projection` field in `FileScanConfig` has been renamed to `projection_exprs
 
 If you directly access the `projection` field:
 
-```rust
-# /* comment to avoid running
+```rust,ignore
 let config: FileScanConfig = ...;
 let projection = config.projection;
-# */
 ```
 
 You should update to:
 
-```rust
-# /* comment to avoid running
+```rust,ignore
 let config: FileScanConfig = ...;
 let projection_exprs = config.projection_exprs;
-# */
 ```
 
 **Impact on builders:**
@@ -168,12 +164,10 @@ Note: `with_projection()` still works but is deprecated and will be removed in a
 
 You can access column indices from `ProjectionExprs` using its methods if needed:
 
-```rust
-# /* comment to avoid running
+```rust,ignore
 let projection_exprs: ProjectionExprs = ...;
 // Get the column indices if the projection only contains simple column references
 let indices = projection_exprs.column_indices();
-# */
 ```
 
 ### `DESCRIBE query` support
@@ -260,6 +254,13 @@ let full_schema = table_schema.table_schema();          // Complete schema with
 let partition_cols_ref = table_schema.table_partition_cols(); // Just the partition columns
 ```
 
+### `AggregateUDFImpl::is_ordered_set_aggregate` has been renamed to `AggregateUDFImpl::supports_within_group_clause`
+
+This method has been renamed to better reflect the actual impact it has for aggregate UDF implementations.
+The accompanying `AggregateUDF::is_ordered_set_aggregate` has also been renamed to `AggregateUDF::supports_within_group_clause`.
+No functionality has been changed with regards to this method; it still refers only to permitting use of `WITHIN GROUP`
+SQL syntax for the aggregate function.
+
 ## DataFusion `50.0.0`
 
 ### ListingTable automatically detects Hive Partitioned tables

Original file line number	Diff line number	Diff line change
`@@ -319,7 +319,7 @@ impl AggregateUDFImpl for ApproxPercentileCont {`
`319`	`319`	`false`
`320`	`320`	`}`
`321`	`321`
`322`		`- fn is_ordered_set_aggregate(&self) -> bool {`
	`322`	`+ fn supports_within_group_clause(&self) -> bool {`
`323`	`323`	`true`
`324`	`324`	`}`
`325`	`325`
Original file line number	Diff line number	Diff line change
`@@ -262,7 +262,7 @@ impl AggregateUDFImpl for ApproxPercentileContWithWeight {`
`262`	`262`	`false`
`263`	`263`	`}`
`264`	`264`
`265`		`- fn is_ordered_set_aggregate(&self) -> bool {`
	`265`	`+ fn supports_within_group_clause(&self) -> bool {`
`266`	`266`	`true`
`267`	`267`	`}`
`268`	`268`
Original file line number	Diff line number	Diff line change
`@@ -360,7 +360,7 @@ impl AggregateUDFImpl for PercentileCont {`
`360`	`360`	`false`
`361`	`361`	`}`
`362`	`362`
`363`		`- fn is_ordered_set_aggregate(&self) -> bool {`
	`363`	`+ fn supports_within_group_clause(&self) -> bool {`
`364`	`364`	`true`
`365`	`365`	`}`
`366`	`366`