Make old aggregates use new block method

TrevorBergeron · TrevorBergeron · commit 0b040c08010c · 2025-12-15T19:52:59.000Z
diff --git a/bigframes/core/array_value.py b/bigframes/core/array_value.py
@@ -323,6 +323,26 @@ def compute_general_reduction(
                   resulting aggregate columns.
         """
         plan = self.node
+
+        # shortcircuit to keep things simple if all aggs are simple
+        # TODO: Fully unify paths once rewriters are strong enough to simplify complexity from full path
+        def _is_direct_agg(agg_expr):
+            return isinstance(agg_expr, agg_expressions.Aggregation) and all(
+                isinstance(child, (ex.DerefOp, ex.ScalarConstantExpression))
+                for child in agg_expr.children
+            )
+
+        if all(_is_direct_agg(agg) for agg in assignments):
+            agg_defs = tuple((agg, ids.ColumnId.unique()) for agg in assignments)
+            return ArrayValue(
+                nodes.AggregateNode(
+                    child=self.node,
+                    aggregations=agg_defs,  # type: ignore
+                    by_column_ids=tuple(map(ex.deref, by_column_ids)),
+                    dropna=dropna,
+                )
+            )
+
         if dropna:
             for col_id in by_column_ids:
                 plan = nodes.FilterNode(plan, ops.notnull_op.as_expr(col_id))
diff --git a/bigframes/core/block_transforms.py b/bigframes/core/block_transforms.py
@@ -129,12 +129,12 @@ def quantile(
                 window_spec=window,
             )
             quantile_cols.append(quantile_col)
-    block, _ = block.aggregate(
-        grouping_column_ids,
+    block = block.aggregate(
         tuple(
             agg_expressions.UnaryAggregation(agg_ops.AnyValueOp(), ex.deref(col))
             for col in quantile_cols
         ),
+        grouping_column_ids,
         column_labels=pd.Index(labels),
         dropna=dropna,
     )
@@ -358,12 +358,12 @@ def value_counts(
     if grouping_keys and drop_na:
         # only need this if grouping_keys is involved, otherwise the drop_na in the aggregation will handle it for us
         block = dropna(block, columns, how="any")
-    block, agg_ids = block.aggregate(
-        by_column_ids=(*grouping_keys, *columns),
+    block = block.aggregate(
         aggregations=[agg_expressions.NullaryAggregation(agg_ops.size_op)],
+        by_column_ids=(*grouping_keys, *columns),
         dropna=drop_na and not grouping_keys,
     )
-    count_id = agg_ids[0]
+    count_id = block.value_columns[0]
     if normalize:
         unbound_window = windows.unbound(grouping_keys=tuple(grouping_keys))
         block, total_count_id = block.apply_window_op(
@@ -641,7 +641,7 @@ def skew(
         skew_expr = _skew_from_moments_and_count(count_agg, moment3_agg, variance_agg)
         aggregations.append(skew_expr)
 
-    block, _ = block.reduce_general(
+    block = block.aggregate(
         aggregations, grouping_column_ids, column_labels=column_labels
     )
     if not grouping_column_ids:
@@ -674,7 +674,7 @@ def kurt(
         kurt_expr = _kurt_from_moments_and_count(count_agg, moment4_agg, variance_agg)
         kurt_exprs.append(kurt_expr)
 
-    block, _ = block.reduce_general(
+    block = block.aggregate(
         kurt_exprs, grouping_column_ids, column_labels=column_labels
     )
     if not grouping_column_ids:
diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
@@ -1169,45 +1169,52 @@ def project_block_exprs(
             index_labels=self._index_labels,
         )
 
-    def reduce_general(
+    def aggregate(
         self,
         aggregations: typing.Sequence[ex.Expression] = (),
         by_column_ids: typing.Sequence[str] = (),
         column_labels: Optional[pd.Index] = None,
         *,
         dropna: bool = True,
-    ) -> typing.Tuple[Block, typing.Sequence[str]]:
+    ) -> Block:
         """
-        Version of the aggregate that supports mixing analytic and scalar expressions.
+        Apply aggregations to the block.
+
+        Grouping columns will form the index of the result block.
+
+        Arguments:
+            aggregations: Aggregation expressions to apply
+            by_column_id: column id of the aggregation key, this is preserved through the transform and used as index.
+            dropna: whether null keys should be dropped
+
+        Returns:
+            Block
         """
         if column_labels is None:
             column_labels = pd.Index(range(len(aggregations)))
 
-        result_expr, output_col_ids = self.expr.compute_general_reduction(
+        result_expr = self.expr.compute_general_reduction(
             aggregations, by_column_ids, dropna=dropna
         )
 
-        names: typing.List[Label] = []
+        grouping_col_labels: typing.List[Label] = []
         if len(by_column_ids) == 0:
             result_expr, label_id = result_expr.create_constant(0, pd.Int64Dtype())
             index_columns = (label_id,)
-            names = [None]
+            grouping_col_labels = [None]
         else:
             index_columns = tuple(by_column_ids)  # type: ignore
             for by_col_id in by_column_ids:
                 if by_col_id in self.value_columns:
-                    names.append(self.col_id_to_label[by_col_id])
+                    grouping_col_labels.append(self.col_id_to_label[by_col_id])
                 else:
-                    names.append(self.col_id_to_index_name[by_col_id])
+                    grouping_col_labels.append(self.col_id_to_index_name[by_col_id])
 
-        return (
-            Block(
-                result_expr,
-                index_columns=index_columns,
-                column_labels=column_labels,
-                index_labels=names,
-            ),
-            [id.name for id in output_col_ids],
+        return Block(
+            result_expr,
+            index_columns=index_columns,
+            column_labels=column_labels,
+            index_labels=grouping_col_labels,
         )
 
     def apply_window_op(
@@ -1419,63 +1426,6 @@ def remap_f(x):
                 col_labels.append(remap_f(col_label))
         return self.with_column_labels(col_labels)
 
-    def aggregate(
-        self,
-        by_column_ids: typing.Sequence[str] = (),
-        aggregations: typing.Sequence[agg_expressions.Aggregation] = (),
-        column_labels: Optional[pd.Index] = None,
-        *,
-        dropna: bool = True,
-    ) -> typing.Tuple[Block, typing.Sequence[str]]:
-        """
-        Apply aggregations to the block.
-
-        Arguments:
-            by_column_id: column id of the aggregation key, this is preserved through the transform and used as index.
-            aggregations: input_column_id, operation tuples
-            dropna: whether null keys should be dropped
-
-        Returns:
-            Tuple[Block, Sequence[str]]:
-                The first element is the grouped block. The second is the
-                column IDs corresponding to each applied aggregation.
-        """
-        if column_labels is None:
-            column_labels = pd.Index(range(len(aggregations)))
-
-        agg_specs = [
-            (
-                aggregation,
-                guid.generate_guid(),
-            )
-            for aggregation in aggregations
-        ]
-        output_col_ids = [agg_spec[1] for agg_spec in agg_specs]
-        result_expr = self.expr.aggregate(agg_specs, by_column_ids, dropna=dropna)
-
-        names: typing.List[Label] = []
-        if len(by_column_ids) == 0:
-            result_expr, label_id = result_expr.create_constant(0, pd.Int64Dtype())
-            index_columns = (label_id,)
-            names = [None]
-        else:
-            index_columns = tuple(by_column_ids)  # type: ignore
-            for by_col_id in by_column_ids:
-                if by_col_id in self.value_columns:
-                    names.append(self.col_id_to_label[by_col_id])
-                else:
-                    names.append(self.col_id_to_index_name[by_col_id])
-
-        return (
-            Block(
-                result_expr,
-                index_columns=index_columns,
-                column_labels=column_labels,
-                index_labels=names,
-            ),
-            output_col_ids,
-        )
-
     def get_stat(
         self,
         column_id: str,
@@ -1835,7 +1785,7 @@ def pivot(
             agg_expressions.UnaryAggregation(agg_ops.AnyValueOp(), ex.deref(col_id))
             for col_id in column_ids
         ]
-        result_block, _ = block.aggregate(
+        result_block = block.aggregate(
             by_column_ids=self.index_columns,
             aggregations=aggregations,
             dropna=True,
@@ -2289,7 +2239,7 @@ def _get_unique_values(
                 self.select_columns(columns), columns
             )
         else:
-            unique_value_block, _ = self.aggregate(by_column_ids=columns, dropna=False)
+            unique_value_block = self.aggregate(by_column_ids=columns, dropna=False)
             col_labels = self._get_labels_for_columns(columns)
             unique_value_block = unique_value_block.reset_index(
                 drop=False
diff --git a/bigframes/core/groupby/dataframe_group_by.py b/bigframes/core/groupby/dataframe_group_by.py
@@ -304,7 +304,7 @@ def corr(
         uniq_orig_columns = utils.combine_indices(labels, pd.Index(range(len(labels))))
         result_labels = utils.cross_indices(uniq_orig_columns, uniq_orig_columns)
 
-        block, _ = block.aggregate(
+        block = block.aggregate(
             by_column_ids=self._by_col_ids,
             aggregations=aggregations,
             column_labels=result_labels,
@@ -339,7 +339,7 @@ def cov(
         uniq_orig_columns = utils.combine_indices(labels, pd.Index(range(len(labels))))
         result_labels = utils.cross_indices(uniq_orig_columns, uniq_orig_columns)
 
-        block, _ = block.aggregate(
+        block = block.aggregate(
             by_column_ids=self._by_col_ids,
             aggregations=aggregations,
             column_labels=result_labels,
@@ -383,9 +383,9 @@ def first(self, numeric_only: bool = False, min_count: int = -1) -> df.DataFrame
             agg_ops.FirstNonNullOp(),
             window_spec=window_spec,
         )
-        block, _ = block.aggregate(
-            self._by_col_ids,
-            tuple(
+        block = block.aggregate(
+            by_column_ids=self._by_col_ids,
+            aggregations=tuple(
                 aggs.agg(firsts_id, agg_ops.AnyValueOp()) for firsts_id in firsts_ids
             ),
             dropna=self._dropna,
@@ -405,9 +405,11 @@ def last(self, numeric_only: bool = False, min_count: int = -1) -> df.DataFrame:
             agg_ops.LastNonNullOp(),
             window_spec=window_spec,
         )
-        block, _ = block.aggregate(
-            self._by_col_ids,
-            tuple(aggs.agg(lasts_id, agg_ops.AnyValueOp()) for lasts_id in lasts_ids),
+        block = block.aggregate(
+            by_column_ids=self._by_col_ids,
+            aggregations=tuple(
+                aggs.agg(lasts_id, agg_ops.AnyValueOp()) for lasts_id in lasts_ids
+            ),
             dropna=self._dropna,
             column_labels=index,
         )
@@ -582,7 +584,7 @@ def _agg_func(self, func) -> df.DataFrame:
         aggregations = [
             aggs.agg(col_id, agg_ops.lookup_agg_func(func)[0]) for col_id in ids
         ]
-        agg_block, _ = self._block.aggregate(
+        agg_block = self._block.aggregate(
             by_column_ids=self._by_col_ids,
             aggregations=aggregations,
             dropna=self._dropna,
@@ -608,7 +610,7 @@ def _agg_dict(self, func: typing.Mapping) -> df.DataFrame:
                 aggregations.append(aggs.agg(col_id, f_op))
                 column_labels.append(label)
                 function_labels.append(f_label)
-        agg_block, _ = self._block.aggregate(
+        agg_block = self._block.aggregate(
             by_column_ids=self._by_col_ids,
             aggregations=aggregations,
             dropna=self._dropna,
@@ -646,7 +648,7 @@ def _agg_list(self, func: typing.Sequence) -> df.DataFrame:
                 (label, agg_ops.lookup_agg_func(f)[1]) for label in labels for f in func
             ]
 
-        agg_block, _ = self._block.aggregate(
+        agg_block = self._block.aggregate(
             by_column_ids=self._by_col_ids,
             aggregations=aggregations,
             dropna=self._dropna,
@@ -672,7 +674,7 @@ def _agg_named(self, **kwargs) -> df.DataFrame:
             col_id = self._resolve_label(v[0])
             aggregations.append(aggs.agg(col_id, agg_ops.lookup_agg_func(v[1])[0]))
             column_labels.append(k)
-        agg_block, _ = self._block.aggregate(
+        agg_block = self._block.aggregate(
             by_column_ids=self._by_col_ids,
             aggregations=aggregations,
             dropna=self._dropna,
@@ -729,7 +731,7 @@ def _aggregate_all(
     ) -> df.DataFrame:
         aggregated_col_ids, labels = self._aggregated_columns(numeric_only=numeric_only)
         aggregations = [aggs.agg(col_id, aggregate_op) for col_id in aggregated_col_ids]
-        result_block, _ = self._block.aggregate(
+        result_block = self._block.aggregate(
             by_column_ids=self._by_col_ids,
             aggregations=aggregations,
             column_labels=labels,
diff --git a/bigframes/core/groupby/group_by.py b/bigframes/core/groupby/group_by.py
@@ -55,7 +55,7 @@ def block_groupby_iter(
         # are more efficient.
         session_aware=False,
     )
-    keys_block, _ = block.aggregate(by_col_ids, dropna=dropna)
+    keys_block = block.aggregate(by_column_ids=by_col_ids, dropna=dropna)
     for chunk in keys_block.to_pandas_batches():
         # Convert to MultiIndex to make sure we get tuples,
         # even for singular keys.
diff --git a/bigframes/core/groupby/series_group_by.py b/bigframes/core/groupby/series_group_by.py
@@ -222,9 +222,9 @@ def first(self, numeric_only: bool = False, min_count: int = -1) -> series.Serie
             agg_ops.FirstNonNullOp(),
             window_spec=window_spec,
         )
-        block, _ = block.aggregate(
-            self._by_col_ids,
+        block = block.aggregate(
             (aggs.agg(firsts_id, agg_ops.AnyValueOp()),),
+            self._by_col_ids,
             dropna=self._dropna,
         )
         return series.Series(block.with_column_labels([self._value_name]))
@@ -246,9 +246,9 @@ def last(self, numeric_only: bool = False, min_count: int = -1) -> series.Series
             agg_ops.LastNonNullOp(),
             window_spec=window_spec,
         )
-        block, _ = block.aggregate(
-            self._by_col_ids,
+        block = block.aggregate(
             (aggs.agg(firsts_id, agg_ops.AnyValueOp()),),
+            self._by_col_ids,
             dropna=self._dropna,
         )
         return series.Series(block.with_column_labels([self._value_name]))
@@ -270,7 +270,7 @@ def agg(self, func=None) -> typing.Union[df.DataFrame, series.Series]:
         ]
         column_names = [agg_ops.lookup_agg_func(f)[1] for f in func]
 
-        agg_block, _ = self._block.aggregate(
+        agg_block = self._block.aggregate(
             by_column_ids=self._by_col_ids,
             aggregations=aggregations,
             dropna=self._dropna,
@@ -413,9 +413,9 @@ def expanding(self, min_periods: int = 1) -> windows.Window:
         )
 
     def _aggregate(self, aggregate_op: agg_ops.UnaryAggregateOp) -> series.Series:
-        result_block, _ = self._block.aggregate(
-            self._by_col_ids,
+        result_block = self._block.aggregate(
             (aggs.agg(self._value_column, aggregate_op),),
+            self._by_col_ids,
             dropna=self._dropna,
         )
 
diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
diff --git a/bigframes/pandas/core/methods/describe.py b/bigframes/pandas/core/methods/describe.py
diff --git a/bigframes/series.py b/bigframes/series.py

Original file line number	Diff line number	Diff line change
`@@ -55,7 +55,7 @@ def block_groupby_iter(`
`55`	`55`	`# are more efficient.`
`56`	`56`	`session_aware=False,`
`57`	`57`	`)`
`58`		`- keys_block, _ = block.aggregate(by_col_ids, dropna=dropna)`
	`58`	`+ keys_block = block.aggregate(by_column_ids=by_col_ids, dropna=dropna)`
`59`	`59`	`for chunk in keys_block.to_pandas_batches():`
`60`	`60`	`# Convert to MultiIndex to make sure we get tuples,`
`61`	`61`	`# even for singular keys.`
Original file line number	Diff line number	Diff line change
`@@ -222,9 +222,9 @@ def first(self, numeric_only: bool = False, min_count: int = -1) -> series.Serie`
`222`	`222`	`agg_ops.FirstNonNullOp(),`
`223`	`223`	`window_spec=window_spec,`
`224`	`224`	`)`
`225`		`- block, _ = block.aggregate(`
`226`		`- self._by_col_ids,`
	`225`	`+ block = block.aggregate(`
`227`	`226`	`(aggs.agg(firsts_id, agg_ops.AnyValueOp()),),`
	`227`	`+ self._by_col_ids,`
`228`	`228`	`dropna=self._dropna,`
`229`	`229`	`)`
`230`	`230`	`return series.Series(block.with_column_labels([self._value_name]))`
`@@ -246,9 +246,9 @@ def last(self, numeric_only: bool = False, min_count: int = -1) -> series.Series`
`246`	`246`	`agg_ops.LastNonNullOp(),`
`247`	`247`	`window_spec=window_spec,`
`248`	`248`	`)`
`249`		`- block, _ = block.aggregate(`
`250`		`- self._by_col_ids,`
	`249`	`+ block = block.aggregate(`
`251`	`250`	`(aggs.agg(firsts_id, agg_ops.AnyValueOp()),),`
	`251`	`+ self._by_col_ids,`
`252`	`252`	`dropna=self._dropna,`
`253`	`253`	`)`
`254`	`254`	`return series.Series(block.with_column_labels([self._value_name]))`
`@@ -270,7 +270,7 @@ def agg(self, func=None) -> typing.Union[df.DataFrame, series.Series]:`
`270`	`270`	`]`
`271`	`271`	`column_names = [agg_ops.lookup_agg_func(f)[1] for f in func]`
`272`	`272`
`273`		`- agg_block, _ = self._block.aggregate(`
	`273`	`+ agg_block = self._block.aggregate(`
`274`	`274`	`by_column_ids=self._by_col_ids,`
`275`	`275`	`aggregations=aggregations,`
`276`	`276`	`dropna=self._dropna,`
`@@ -413,9 +413,9 @@ def expanding(self, min_periods: int = 1) -> windows.Window:`
`413`	`413`	`)`
`414`	`414`
`415`	`415`	`def _aggregate(self, aggregate_op: agg_ops.UnaryAggregateOp) -> series.Series:`
`416`		`- result_block, _ = self._block.aggregate(`
`417`		`- self._by_col_ids,`
	`416`	`+ result_block = self._block.aggregate(`
`418`	`417`	`(aggs.agg(self._value_column, aggregate_op),),`
	`418`	`+ self._by_col_ids,`
`419`	`419`	`dropna=self._dropna,`
`420`	`420`	`)`
`421`	`421`