apache
diff --git a/‎datafusion/core/src/physical_planner.rs‎
Lines changed: 7 additions & 3 deletions b/‎datafusion/core/src/physical_planner.rs‎
Lines changed: 7 additions & 3 deletions
diff --git a/‎datafusion/core/tests/execution/coop.rs‎
Lines changed: 5 additions & 3 deletions b/‎datafusion/core/tests/execution/coop.rs‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎datafusion/core/tests/physical_optimizer/filter_pushdown/mod.rs‎
Lines changed: 2 additions & 0 deletions b/‎datafusion/core/tests/physical_optimizer/filter_pushdown/mod.rs‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎datafusion/physical-plan/src/aggregates/mod.rs‎
Lines changed: 34 additions & 15 deletions b/‎datafusion/physical-plan/src/aggregates/mod.rs‎
Lines changed: 34 additions & 15 deletions
diff --git a/‎datafusion/physical-plan/src/aggregates/no_grouping.rs‎
Lines changed: 20 additions & 0 deletions b/‎datafusion/physical-plan/src/aggregates/no_grouping.rs‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎datafusion/proto/proto/datafusion.proto‎
Lines changed: 1 addition & 0 deletions b/‎datafusion/proto/proto/datafusion.proto‎
Lines changed: 1 addition & 0 deletions
@@ -1582,7 +1582,8 @@ impl DefaultPhysicalPlanner {
             }
         } else if group_expr.is_empty() {
             // No GROUP BY clause - create empty PhysicalGroupBy
-            Ok(PhysicalGroupBy::new(vec![], vec![], vec![]))
+            // no expressions, no null expressions and no grouping expressions
+            Ok(PhysicalGroupBy::new(vec![], vec![], vec![], false))
         } else {
             Ok(PhysicalGroupBy::new_single(
                 group_expr
@@ -1654,6 +1655,7 @@ fn merge_grouping_set_physical_expr(
         grouping_set_expr,
         null_exprs,
         merged_sets,
+        true,
     ))
 }
 
@@ -1696,7 +1698,7 @@ fn create_cube_physical_expr(
         }
     }
 
-    Ok(PhysicalGroupBy::new(all_exprs, null_exprs, groups))
+    Ok(PhysicalGroupBy::new(all_exprs, null_exprs, groups, true))
 }
 
 /// Expand and align a ROLLUP expression. This is a special case of GROUPING SETS
@@ -1741,7 +1743,7 @@ fn create_rollup_physical_expr(
         groups.push(group)
     }
 
-    Ok(PhysicalGroupBy::new(all_exprs, null_exprs, groups))
+    Ok(PhysicalGroupBy::new(all_exprs, null_exprs, groups, true))
 }
 
 /// For a given logical expr, get a properly typed NULL ScalarValue physical expression
@@ -2832,6 +2834,7 @@ mod tests {
                         true,
                     ],
                 ],
+                has_grouping_set: true,
             },
         )
         "#);
@@ -2942,6 +2945,7 @@ mod tests {
                         false,
                     ],
                 ],
+                has_grouping_set: true,
             },
         )
         "#);
 
@@ -170,7 +170,7 @@ async fn agg_no_grouping_yields(
     let inf = Arc::new(make_lazy_exec("value", pretend_infinite));
     let aggr = Arc::new(AggregateExec::try_new(
         AggregateMode::Single,
-        PhysicalGroupBy::new(vec![], vec![], vec![]),
+        PhysicalGroupBy::new(vec![], vec![], vec![], false),
         vec![Arc::new(
             AggregateExprBuilder::new(
                 sum::sum_udaf(),
@@ -204,7 +204,7 @@ async fn agg_grouping_yields(
 
     let aggr = Arc::new(AggregateExec::try_new(
         AggregateMode::Single,
-        PhysicalGroupBy::new(vec![(group, "group".to_string())], vec![], vec![]),
+        PhysicalGroupBy::new(vec![(group, "group".to_string())], vec![], vec![], false),
         vec![Arc::new(
             AggregateExprBuilder::new(sum::sum_udaf(), vec![value_col.clone()])
                 .schema(inf.schema())
@@ -240,6 +240,7 @@ async fn agg_grouped_topk_yields(
                 vec![(group, "group".to_string())],
                 vec![],
                 vec![vec![false]],
+                false,
             ),
             vec![Arc::new(
                 AggregateExprBuilder::new(min_max::max_udaf(), vec![value_col.clone()])
@@ -545,6 +546,7 @@ async fn interleave_then_aggregate_yields(
             vec![], // no GROUP BY columns
             vec![], // no GROUP BY expressions
             vec![], // no GROUP BY physical expressions
+            false,
         ),
         vec![Arc::new(aggregate_expr)],
         vec![None], // no “distinct” flags
@@ -676,7 +678,7 @@ async fn join_agg_yields(
 
     let aggr = Arc::new(AggregateExec::try_new(
         AggregateMode::Single,
-        PhysicalGroupBy::new(vec![], vec![], vec![]),
+        PhysicalGroupBy::new(vec![], vec![], vec![], false),
         vec![Arc::new(aggregate_expr)],
         vec![None],
         projection,
 
@@ -2545,6 +2545,7 @@ fn test_no_pushdown_grouping_sets_filter_on_missing_column() {
             vec![false, false], // (a, b) - both present
             vec![true, false],  // (b) - a is NULL, b present
         ],
+        true,
     );
 
     let aggregate = Arc::new(
@@ -2615,6 +2616,7 @@ fn test_pushdown_grouping_sets_filter_on_common_column() {
             vec![false, false], // (a, b) - both present
             vec![true, false],  // (b) - a is NULL, b present
         ],
+        true,
     );
 
     let aggregate = Arc::new(
 
@@ -177,6 +177,9 @@ pub struct PhysicalGroupBy {
     /// expression in null_expr. If `groups[i][j]` is true, then the
     /// j-th expression in the i-th group is NULL, otherwise it is `expr[j]`.
     groups: Vec<Vec<bool>>,
+    /// True when GROUPING SETS/CUBE/ROLLUP are used so `__grouping_id` should
+    /// be included in the output schema.
+    has_grouping_set: bool,
 }
 
 impl PhysicalGroupBy {
@@ -185,11 +188,13 @@ impl PhysicalGroupBy {
         expr: Vec<(Arc<dyn PhysicalExpr>, String)>,
         null_expr: Vec<(Arc<dyn PhysicalExpr>, String)>,
         groups: Vec<Vec<bool>>,
+        has_grouping_set: bool,
     ) -> Self {
         Self {
             expr,
             null_expr,
             groups,
+            has_grouping_set,
         }
     }
 
@@ -201,6 +206,7 @@ impl PhysicalGroupBy {
             expr,
             null_expr: vec![],
             groups: vec![vec![false; num_exprs]],
+            has_grouping_set: false,
         }
     }
 
@@ -217,6 +223,11 @@ impl PhysicalGroupBy {
         exprs_nullable
     }
 
+    /// Returns true if this has no grouping at all (including no GROUPING SETS)
+    pub fn is_true_no_grouping(&self) -> bool {
+        self.is_empty() && !self.has_grouping_set
+    }
+
     /// Returns the group expressions
     pub fn expr(&self) -> &[(Arc<dyn PhysicalExpr>, String)] {
         &self.expr
@@ -232,14 +243,20 @@ impl PhysicalGroupBy {
         &self.groups
     }
 
+    /// Returns true if this grouping uses GROUPING SETS, CUBE or ROLLUP.
+    pub fn has_grouping_set(&self) -> bool {
+        self.has_grouping_set
+    }
+
     /// Returns true if this `PhysicalGroupBy` has no group expressions
     pub fn is_empty(&self) -> bool {
         self.expr.is_empty()
     }
 
-    /// Check whether grouping set is single group
+    /// Returns true if this is a "simple" GROUP BY (not using GROUPING SETS/CUBE/ROLLUP).
+    /// This determines whether the `__grouping_id` column is included in the output schema.
     pub fn is_single(&self) -> bool {
-        self.null_expr.is_empty()
+        !self.has_grouping_set
     }
 
     /// Calculate GROUP BY expressions according to input schema.
@@ -253,7 +270,7 @@ impl PhysicalGroupBy {
     /// The number of expressions in the output schema.
     fn num_output_exprs(&self) -> usize {
         let mut num_exprs = self.expr.len();
-        if !self.is_single() {
+        if self.has_grouping_set {
             num_exprs += 1
         }
         num_exprs
@@ -270,7 +287,7 @@ impl PhysicalGroupBy {
                 .take(num_output_exprs)
                 .map(|(index, (_, name))| Arc::new(Column::new(name, index)) as _),
         );
-        if !self.is_single() {
+        if self.has_grouping_set {
             output_exprs.push(Arc::new(Column::new(
                 Aggregate::INTERNAL_GROUPING_ID,
                 self.expr.len(),
@@ -281,11 +298,7 @@ impl PhysicalGroupBy {
 
     /// Returns the number expression as grouping keys.
     pub fn num_group_exprs(&self) -> usize {
-        if self.is_single() {
-            self.expr.len()
-        } else {
-            self.expr.len() + 1
-        }
+        self.expr.len() + usize::from(self.has_grouping_set)
     }
 
     pub fn group_schema(&self, schema: &Schema) -> Result<SchemaRef> {
@@ -308,7 +321,7 @@ impl PhysicalGroupBy {
                 .into(),
             );
         }
-        if !self.is_single() {
+        if self.has_grouping_set {
             fields.push(
                 Field::new(
                     Aggregate::INTERNAL_GROUPING_ID,
@@ -344,17 +357,17 @@ impl PhysicalGroupBy {
                 )
                 .collect();
         let num_exprs = expr.len();
-        let groups = if self.expr.is_empty() {
+        let groups = if self.expr.is_empty() && !self.has_grouping_set {
             // No GROUP BY expressions - should have no groups
             vec![]
         } else {
-            // Has GROUP BY expressions - create a single group
             vec![vec![false; num_exprs]]
         };
         Self {
             expr,
             null_expr: vec![],
             groups,
+            has_grouping_set: false,
         }
     }
 }
@@ -374,6 +387,7 @@ impl PartialEq for PhysicalGroupBy {
                 .zip(other.null_expr.iter())
                 .all(|((expr1, name1), (expr2, name2))| expr1.eq(expr2) && name1 == name2)
             && self.groups == other.groups
+            && self.has_grouping_set == other.has_grouping_set
     }
 }
 
@@ -723,8 +737,7 @@ impl AggregateExec {
         partition: usize,
         context: &Arc<TaskContext>,
     ) -> Result<StreamType> {
-        // no group by at all
-        if self.group_by.expr.is_empty() {
+        if self.group_by.is_true_no_grouping() {
             return Ok(StreamType::AggregateStream(AggregateStream::new(
                 self, context, partition,
             )?));
@@ -757,7 +770,7 @@ impl AggregateExec {
     /// on an AggregateExec.
     pub fn is_unordered_unfiltered_group_by_distinct(&self) -> bool {
         // ensure there is a group by
-        if self.group_expr().is_empty() {
+        if self.group_expr().is_empty() && !self.group_expr().has_grouping_set() {
             return false;
         }
         // ensure there are no aggregate expressions
@@ -1954,6 +1967,7 @@ mod tests {
                 vec![true, false],  // (NULL, b)
                 vec![false, false], // (a,b)
             ],
+            true,
         );
 
         let aggregates = vec![Arc::new(
@@ -2103,6 +2117,7 @@ mod tests {
             vec![(col("a", &input_schema)?, "a".to_string())],
             vec![],
             vec![vec![false]],
+            false,
         );
 
         let aggregates: Vec<Arc<AggregateFunctionExpr>> = vec![Arc::new(
@@ -2448,6 +2463,7 @@ mod tests {
             vec![(col("a", &input_schema)?, "a".to_string())],
             vec![],
             vec![vec![false]],
+            false,
         );
 
         // something that allocates within the aggregator
@@ -2892,6 +2908,7 @@ mod tests {
                 vec![true, false, true],
                 vec![true, true, false],
             ],
+            true,
         );
 
         let aggregates: Vec<Arc<AggregateFunctionExpr>> = vec![
@@ -3251,6 +3268,7 @@ mod tests {
                 vec![false, true],  // (a, NULL)
                 vec![false, false], // (a,b)
             ],
+            true,
         );
         let aggr_schema = create_schema(
             &input_schema,
@@ -3302,6 +3320,7 @@ mod tests {
             vec![(col("a", &schema)?, "a".to_string())],
             vec![],
             vec![vec![false]],
+            false,
         );
 
         // Test with MIN for simple intermediate state (min) and AVG for multiple intermediate states (partial sum, partial count).
 
@@ -251,6 +251,23 @@ fn scalar_cmp_null_short_circuit(
     }
 }
 
+/// Prepend the grouping ID column to the output columns if present.
+///
+/// For GROUPING SETS with no GROUP BY expressions, the schema includes a `__grouping_id`
+/// column that must be present in the output. This function inserts it at the beginning
+/// of the columns array to maintain schema alignment.
+fn prepend_grouping_id_column(
+    mut columns: Vec<Arc<dyn arrow::array::Array>>,
+    grouping_id: Option<&ScalarValue>,
+) -> Result<Vec<Arc<dyn arrow::array::Array>>> {
+    if let Some(id) = grouping_id {
+        let num_rows = columns.first().map(|array| array.len()).unwrap_or(1);
+        let grouping_ids = id.to_array_of_size(num_rows)?;
+        columns.insert(0, grouping_ids);
+    }
+    Ok(columns)
+}
+
 impl AggregateStream {
     /// Create a new AggregateStream
     pub fn new(
@@ -350,6 +367,9 @@ impl AggregateStream {
                         let timer = this.baseline_metrics.elapsed_compute().timer();
                         let result =
                             finalize_aggregation(&mut this.accumulators, &this.mode)
+                                .and_then(|columns| {
+                                    prepend_grouping_id_column(columns, None)
+                                })
                                 .and_then(|columns| {
                                     RecordBatch::try_new(
                                         Arc::clone(&this.schema),
 
@@ -1223,6 +1223,7 @@ message AggregateExecNode {
   repeated bool groups = 9;
   repeated MaybeFilter filter_expr = 10;
   AggLimit limit = 11;
+  bool has_grouping_set = 12;
 }
 
 message GlobalLimitExecNode {
Original file line number	Diff line number	Diff line change
`@@ -1582,7 +1582,8 @@ impl DefaultPhysicalPlanner {`
`1582`	`1582`	`}`
`1583`	`1583`	`} else if group_expr.is_empty() {`
`1584`	`1584`	`// No GROUP BY clause - create empty PhysicalGroupBy`
`1585`		`- Ok(PhysicalGroupBy::new(vec![], vec![], vec![]))`
	`1585`	`+ // no expressions, no null expressions and no grouping expressions`
	`1586`	`+ Ok(PhysicalGroupBy::new(vec![], vec![], vec![], false))`
`1586`	`1587`	`} else {`
`1587`	`1588`	`Ok(PhysicalGroupBy::new_single(`
`1588`	`1589`	`group_expr`
`@@ -1654,6 +1655,7 @@ fn merge_grouping_set_physical_expr(`
`1654`	`1655`	`grouping_set_expr,`
`1655`	`1656`	`null_exprs,`
`1656`	`1657`	`merged_sets,`
	`1658`	`+ true,`
`1657`	`1659`	`))`
`1658`	`1660`	`}`
`1659`	`1661`
`@@ -1696,7 +1698,7 @@ fn create_cube_physical_expr(`
`1696`	`1698`	`}`
`1697`	`1699`	`}`
`1698`	`1700`
`1699`		`- Ok(PhysicalGroupBy::new(all_exprs, null_exprs, groups))`
	`1701`	`+ Ok(PhysicalGroupBy::new(all_exprs, null_exprs, groups, true))`
`1700`	`1702`	`}`
`1701`	`1703`
`1702`	`1704`	`/// Expand and align a ROLLUP expression. This is a special case of GROUPING SETS`
`@@ -1741,7 +1743,7 @@ fn create_rollup_physical_expr(`
`1741`	`1743`	`groups.push(group)`
`1742`	`1744`	`}`
`1743`	`1745`
`1744`		`- Ok(PhysicalGroupBy::new(all_exprs, null_exprs, groups))`
	`1746`	`+ Ok(PhysicalGroupBy::new(all_exprs, null_exprs, groups, true))`
`1745`	`1747`	`}`
`1746`	`1748`
`1747`	`1749`	`/// For a given logical expr, get a properly typed NULL ScalarValue physical expression`
`@@ -2832,6 +2834,7 @@ mod tests {`
`2832`	`2834`	`true,`
`2833`	`2835`	`],`
`2834`	`2836`	`],`
	`2837`	`+ has_grouping_set: true,`
`2835`	`2838`	`},`
`2836`	`2839`	`)`
`2837`	`2840`	`"#);`
`@@ -2942,6 +2945,7 @@ mod tests {`
`2942`	`2945`	`false,`
`2943`	`2946`	`],`
`2944`	`2947`	`],`
	`2948`	`+ has_grouping_set: true,`
`2945`	`2949`	`},`
`2946`	`2950`	`)`
`2947`	`2951`	`"#);`
Original file line number	Diff line number	Diff line change
`@@ -1223,6 +1223,7 @@ message AggregateExecNode {`
`1223`	`1223`	`repeated bool groups = 9;`
`1224`	`1224`	`repeated MaybeFilter filter_expr = 10;`
`1225`	`1225`	`AggLimit limit = 11;`
	`1226`	`+ bool has_grouping_set = 12;`
`1226`	`1227`	`}`
`1227`	`1228`
`1228`	`1229`	`message GlobalLimitExecNode {`