databendlabs
diff --git a/‎src/query/functions/src/scalars/other.rs‎
Lines changed: 51 additions & 0 deletions b/‎src/query/functions/src/scalars/other.rs‎
Lines changed: 51 additions & 0 deletions
diff --git a/‎src/query/functions/tests/it/scalars/testdata/function_list.txt‎
Lines changed: 1 addition & 0 deletions b/‎src/query/functions/tests/it/scalars/testdata/function_list.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/query/service/src/pipelines/pipeline_builder.rs‎
Lines changed: 7 additions & 14 deletions b/‎src/query/service/src/pipelines/pipeline_builder.rs‎
Lines changed: 7 additions & 14 deletions
diff --git a/‎src/query/sql/src/executor/physical_plan.rs‎
Lines changed: 14 additions & 9 deletions b/‎src/query/sql/src/executor/physical_plan.rs‎
Lines changed: 14 additions & 9 deletions
diff --git a/‎src/query/sql/src/executor/physical_plan_builder.rs‎
Lines changed: 1 addition & 2 deletions b/‎src/query/sql/src/executor/physical_plan_builder.rs‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎src/query/sql/src/planner/binder/aggregate.rs‎
Lines changed: 69 additions & 7 deletions b/‎src/query/sql/src/planner/binder/aggregate.rs‎
Lines changed: 69 additions & 7 deletions
diff --git a/‎src/query/sql/src/planner/binder/select.rs‎
Lines changed: 3 additions & 2 deletions b/‎src/query/sql/src/planner/binder/select.rs‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎src/query/sql/src/planner/semantic/grouping_check.rs‎
Lines changed: 9 additions & 1 deletion b/‎src/query/sql/src/planner/semantic/grouping_check.rs‎
Lines changed: 9 additions & 1 deletion
diff --git a/‎src/query/sql/src/planner/semantic/type_check.rs‎
Lines changed: 12 additions & 0 deletions b/‎src/query/sql/src/planner/semantic/type_check.rs‎
Lines changed: 12 additions & 0 deletions
@@ -35,12 +35,16 @@ use common_expression::types::DateType;
 use common_expression::types::GenericType;
 use common_expression::types::NullType;
 use common_expression::types::NullableType;
+use common_expression::types::NumberColumn;
+use common_expression::types::NumberDataType;
+use common_expression::types::NumberScalar;
 use common_expression::types::NumberType;
 use common_expression::types::SimpleDomain;
 use common_expression::types::StringType;
 use common_expression::types::TimestampType;
 use common_expression::types::ValueType;
 use common_expression::vectorize_with_builder_1_arg;
+use common_expression::Column;
 use common_expression::Domain;
 use common_expression::EvalContext;
 use common_expression::Function;
@@ -49,6 +53,7 @@ use common_expression::FunctionProperty;
 use common_expression::FunctionRegistry;
 use common_expression::FunctionSignature;
 use common_expression::Scalar;
+use common_expression::ScalarRef;
 use common_expression::Value;
 use common_expression::ValueRef;
 use ordered_float::OrderedFloat;
@@ -66,6 +71,7 @@ pub fn register(registry: &mut FunctionRegistry) {
     register_inet_aton(registry);
     register_inet_ntoa(registry);
     register_run_diff(registry);
+    register_grouping(registry);
 
     registry.register_passthrough_nullable_1_arg::<Float64Type, StringType, _, _>(
         "humanize_size",
@@ -343,3 +349,48 @@ fn register_run_diff(registry: &mut FunctionRegistry) {
         OrderedFloat(0.0)
     );
 }
+
+fn register_grouping(registry: &mut FunctionRegistry) {
+    registry.register_function_factory("grouping", |params, arg_type| {
+        if arg_type.len() != 1 {
+            return None;
+        }
+
+        let params = params.to_vec();
+        Some(Arc::new(Function {
+            signature: FunctionSignature {
+                name: "grouping".to_string(),
+                args_type: vec![DataType::Number(NumberDataType::UInt32)],
+                return_type: DataType::Number(NumberDataType::UInt32),
+                property: FunctionProperty::default(),
+            },
+            calc_domain: Box::new(|_| FunctionDomain::Full),
+            eval: Box::new(move |args, _| match &args[0] {
+                ValueRef::Scalar(ScalarRef::Number(NumberScalar::UInt32(v))) => Value::Scalar(
+                    Scalar::Number(NumberScalar::UInt32(compute_grouping(&params, *v))),
+                ),
+                ValueRef::Column(Column::Number(NumberColumn::UInt32(col))) => {
+                    let output = col
+                        .iter()
+                        .map(|v| compute_grouping(&params, *v))
+                        .collect::<Vec<_>>();
+                    Value::Column(Column::Number(NumberColumn::UInt32(output.into())))
+                }
+                _ => unreachable!(),
+            }),
+        }))
+    })
+}
+
+/// Compute `grouping` by `grouping_id` and `cols`.
+///
+/// `cols` are indices of the column represented in `_grouping_id`.
+/// The order will influence the result of `grouping`.
+#[inline(always)]
+pub fn compute_grouping(cols: &[usize], grouping_id: u32) -> u32 {
+    let mut grouping = 0;
+    for (i, &j) in cols.iter().rev().enumerate() {
+        grouping |= ((grouping_id & (1 << j)) >> j) << i;
+    }
+    grouping
+}
@@ -1610,6 +1610,7 @@ Functions overloads:
 1 great_circle_angle(Float64 NULL, Float64 NULL, Float64 NULL, Float64 NULL) :: Float32 NULL
 0 great_circle_distance(Float64, Float64, Float64, Float64) :: Float32
 1 great_circle_distance(Float64 NULL, Float64 NULL, Float64 NULL, Float64 NULL) :: Float32 NULL
+0 grouping FACTORY
 0 gt(Variant, Variant) :: Boolean
 1 gt(Variant NULL, Variant NULL) :: Boolean NULL
 2 gt(String, String) :: Boolean
 
@@ -434,19 +434,11 @@ impl PipelineBuilder {
         let group_bys = expand
             .group_bys
             .iter()
-            .filter_map(|i| {
-                // Do not collect virtual column "_grouping_id".
-                if *i != expand.grouping_id_index {
-                    match input_schema.index_of(&i.to_string()) {
-                        Ok(index) => {
-                            let ty = input_schema.field(index).data_type().clone();
-                            Some(Ok((index, ty)))
-                        }
-                        Err(e) => Some(Err(e)),
-                    }
-                } else {
-                    None
-                }
+            .take(expand.group_bys.len() - 1) // The last group-by will be virtual column `_grouping_id`
+            .map(|i| {
+                let index = input_schema.index_of(&i.to_string())?;
+                let ty = input_schema.field(index).data_type();
+                Ok((index, ty.clone()))
             })
             .collect::<Result<Vec<_>>>()?;
         let grouping_sets = expand
@@ -463,6 +455,7 @@ impl PipelineBuilder {
             })
             .collect::<Result<Vec<_>>>()?;
         let mut grouping_ids = Vec::with_capacity(grouping_sets.len());
+        let mask = (1 << group_bys.len()) - 1;
         for set in grouping_sets {
             let mut id = 0;
             for i in set {
@@ -474,7 +467,7 @@ impl PipelineBuilder {
             // group_bys: [a, b]
             // grouping_sets: [[0, 1], [0], [1], []]
             // grouping_ids: 00, 01, 10, 11
-            grouping_ids.push(!id);
+            grouping_ids.push(!id & mask);
         }
 
         self.main_pipeline.add_transform(|input, output| {
 
@@ -174,24 +174,29 @@ pub struct AggregateExpand {
     pub plan_id: u32,
 
     pub input: Box<PhysicalPlan>,
-    pub group_bys: Vec<usize>,
+    pub group_bys: Vec<IndexType>,
     pub grouping_id_index: IndexType,
-    pub grouping_sets: Vec<Vec<usize>>,
+    pub grouping_sets: Vec<Vec<IndexType>>,
     /// Only used for explain
     pub stat_info: Option<PlanStatsInfo>,
 }
 
 impl AggregateExpand {
     pub fn output_schema(&self) -> Result<DataSchemaRef> {
         let input_schema = self.input.output_schema()?;
-        let input_fields = input_schema.fields();
-        let mut output_fields = Vec::with_capacity(input_fields.len() + 1);
-        for field in input_fields {
-            output_fields.push(DataField::new(
-                field.name(),
-                field.data_type().wrap_nullable(),
-            ));
+        let mut output_fields = input_schema.fields().clone();
+
+        for group_by in self
+            .group_bys
+            .iter()
+            .filter(|&index| *index != self.grouping_id_index)
+        {
+            // All group by columns will wrap nullable.
+            let i = input_schema.index_of(&group_by.to_string())?;
+            let f = &mut output_fields[i];
+            *f = DataField::new(f.name(), f.data_type().wrap_nullable())
         }
+
         output_fields.push(DataField::new(
             &self.grouping_id_index.to_string(),
             DataType::Number(NumberDataType::UInt32),
 
@@ -497,8 +497,7 @@ impl PhysicalPlanBuilder {
                                     output_column: v.index,
                                     args: agg.args.iter().map(|arg| {
                                         if let ScalarExpr::BoundColumnRef(col) = arg {
-                                            let col_index = input_schema.index_of(&col.column.index.to_string())?;
-                                            Ok(col_index)
+                                            input_schema.index_of(&col.column.index.to_string())
                                         } else {
                                             Err(ErrorCode::Internal(
                                                 "Aggregate function argument must be a BoundColumnRef".to_string()
 
@@ -80,7 +80,7 @@ pub struct AggregateInfo {
     pub group_items_map: HashMap<String, usize>,
 
     /// Index for virtual column `grouping_id`. It's valid only if `grouping_sets` is not empty.
-    pub grouping_id_index: IndexType,
+    pub grouping_id_column: Option<ColumnBinding>,
     /// Each grouping set is a list of column indices in `group_items`.
     pub grouping_sets: Vec<Vec<IndexType>>,
 }
@@ -124,6 +124,9 @@ impl<'a> AggregateRewriter<'a> {
             }
             .into()),
             ScalarExpr::FunctionCall(func) => {
+                if func.func_name.eq_ignore_ascii_case("grouping") {
+                    return self.replace_grouping(func);
+                }
                 let new_args = func
                     .arguments
                     .iter()
@@ -225,6 +228,46 @@ impl<'a> AggregateRewriter<'a> {
 
         Ok(replaced_agg.into())
     }
+
+    fn replace_grouping(&mut self, function: &FunctionCall) -> Result<ScalarExpr> {
+        let agg_info = &mut self.bind_context.aggregate_info;
+        if agg_info.grouping_id_column.is_none() {
+            return Err(ErrorCode::SemanticError(
+                "grouping can only be called in GROUP BY GROUPING SETS clauses",
+            ));
+        }
+        let grouping_id_column = agg_info.grouping_id_column.clone().unwrap();
+
+        // Rewrite the args to params.
+        // The params are the index offset in `grouping_id`.
+        // Here is an example:
+        // If the query is `select grouping(b, a) from group by grouping sets ((a, b), (a));`
+        // The group-by items are: [a, b].
+        // The group ids will be (a: 0, b: 1):
+        // ba -> 00 -> 0
+        // _a -> 01 -> 1
+        // grouping(b, a) will be rewritten to grouping<1, 0>(grouping_id).
+        let mut replaced_params = Vec::with_capacity(function.arguments.len());
+        for arg in &function.arguments {
+            if let Some(index) = agg_info.group_items_map.get(&format!("{:?}", arg)) {
+                replaced_params.push(*index);
+            } else {
+                return Err(ErrorCode::BadArguments(
+                    "Arguments of grouping should be group by expressions",
+                ));
+            }
+        }
+
+        let replaced_func = FunctionCall {
+            func_name: function.func_name.clone(),
+            params: replaced_params,
+            arguments: vec![ScalarExpr::BoundColumnRef(BoundColumnRef {
+                column: grouping_id_column,
+            })],
+        };
+
+        Ok(replaced_func.into())
+    }
 }
 
 impl Binder {
@@ -331,8 +374,12 @@ impl Binder {
             aggregate_functions: bind_context.aggregate_info.aggregate_functions.clone(),
             from_distinct: false,
             limit: None,
-            grouping_id_index: agg_info.grouping_id_index,
             grouping_sets: agg_info.grouping_sets.clone(),
+            grouping_id_index: agg_info
+                .grouping_id_column
+                .as_ref()
+                .map(|g| g.index)
+                .unwrap_or(0),
         };
         new_expr = SExpr::create_unary(aggregate_plan.into(), new_expr);
 
@@ -358,15 +405,16 @@ impl Binder {
             )
             .await?;
         }
+        let agg_info = &mut bind_context.aggregate_info;
         // `grouping_sets` stores formatted `ScalarExpr` for each grouping set.
         let grouping_sets = grouping_sets
             .into_iter()
             .map(|set| {
                 let mut set = set
                     .into_iter()
                     .map(|s| {
-                        let offset = *bind_context.aggregate_info.group_items_map.get(&s).unwrap();
-                        bind_context.aggregate_info.group_items[offset].index
+                        let offset = *agg_info.group_items_map.get(&s).unwrap();
+                        agg_info.group_items[offset].index
                     })
                     .collect::<Vec<_>>();
                 // Grouping sets with the same items should be treated as the same.
@@ -375,7 +423,7 @@ impl Binder {
             })
             .collect::<Vec<_>>();
         let grouping_sets = grouping_sets.into_iter().unique().collect();
-        bind_context.aggregate_info.grouping_sets = grouping_sets;
+        agg_info.grouping_sets = grouping_sets;
         // Add a virtual column `_grouping_id` to group items.
         let grouping_id_column = self.create_column_binding(
             None,
@@ -384,8 +432,17 @@ impl Binder {
             DataType::Number(NumberDataType::UInt32),
         );
         let index = grouping_id_column.index;
-        bind_context.aggregate_info.grouping_id_index = index;
-        bind_context.aggregate_info.group_items.push(ScalarItem {
+        agg_info.grouping_id_column = Some(grouping_id_column.clone());
+        agg_info.group_items_map.insert(
+            format!(
+                "{:?}",
+                ScalarExpr::BoundColumnRef(BoundColumnRef {
+                    column: grouping_id_column.clone()
+                })
+            ),
+            agg_info.group_items.len(),
+        );
+        agg_info.group_items.push(ScalarItem {
             index,
             scalar: ScalarExpr::BoundColumnRef(BoundColumnRef {
                 column: grouping_id_column,
@@ -485,6 +542,11 @@ impl Binder {
             );
         }
 
+        // If it's `GROUP BY GROUPING SETS`, ignore the optimization below.
+        if collect_grouping_sets {
+            return Ok(());
+        }
+
         // Remove dependent group items, group by a, f(a, b), f(a), b ---> group by a,b
         let mut results = vec![];
         for item in bind_context.aggregate_info.group_items.iter() {
 
@@ -101,8 +101,6 @@ impl Binder {
             .normalize_select_list(&mut from_context, &stmt.select_list)
             .await?;
 
-        let (mut scalar_items, projections) = self.analyze_projection(&select_list)?;
-
         // This will potentially add some alias group items to `from_context` if find some.
         if let Some(group_by) = stmt.group_by.as_ref() {
             self.analyze_group_items(&mut from_context, &select_list, group_by)
@@ -111,6 +109,9 @@ impl Binder {
 
         self.analyze_aggregate_select(&mut from_context, &mut select_list)?;
 
+        // `analyze_projection` should behind `analyze_aggregate_select` because `analyze_aggregate_select` will rewrite `grouping`.
+        let (mut scalar_items, projections) = self.analyze_projection(&select_list)?;
+
         let having = if let Some(having) = &stmt.having {
             Some(
                 self.analyze_aggregate_having(&mut from_context, &select_list, having)
 
@@ -49,7 +49,8 @@ impl<'a> GroupingChecker<'a> {
             .get(&format!("{:?}", scalar))
         {
             let column = &self.bind_context.aggregate_info.group_items[*index];
-            let column_binding = if let ScalarExpr::BoundColumnRef(column_ref) = &column.scalar {
+            let mut column_binding = if let ScalarExpr::BoundColumnRef(column_ref) = &column.scalar
+            {
                 column_ref.column.clone()
             } else {
                 ColumnBinding {
@@ -61,6 +62,13 @@ impl<'a> GroupingChecker<'a> {
                     visibility: Visibility::Visible,
                 }
             };
+
+            if let Some(grouping_id) = &self.bind_context.aggregate_info.grouping_id_column {
+                if grouping_id.index != column_binding.index {
+                    column_binding.data_type = Box::new(column_binding.data_type.wrap_nullable());
+                }
+            }
+
             return Ok(BoundColumnRef {
                 column: column_binding,
             }
 
@@ -991,6 +991,18 @@ impl<'a> TypeChecker<'a> {
             Self::rewrite_substring(&mut args);
         }
 
+        if func_name == "grouping" {
+            // `grouping` will be rewritten again after resolving grouping sets.
+            return Ok(Box::new((
+                ScalarExpr::FunctionCall(FunctionCall {
+                    params: vec![],
+                    arguments: args,
+                    func_name: "grouping".to_string(),
+                }),
+                DataType::Number(NumberDataType::UInt32),
+            )));
+        }
+
         // rewrite_collation
         let func_name = if self.function_need_collation(func_name, &args)?
             && self.ctx.get_settings().get_collation()? == "utf8"