14
14
from janitor .functions .select import get_index_labels
15
15
16
16
17
+ @pf .register_groupby_method
17
18
@pf .register_dataframe_method
18
19
def summarise (
19
- df : pd .DataFrame ,
20
+ df : pd .DataFrame | DataFrameGroupBy ,
20
21
* args : tuple [dict | tuple ],
21
22
by : Any = None ,
22
23
) -> pd .DataFrame :
@@ -107,6 +108,8 @@ def summarise(
107
108
Arguments supported in `pd.DataFrame.groupby`
108
109
can also be passed to `by` via a dictionary.
109
110
111
+ If `df` is a `DataFrameGroupBy` object, `by` is ignored.
112
+
110
113
Examples:
111
114
>>> import pandas as pd
112
115
>>> import janitor
@@ -160,7 +163,7 @@ def summarise(
160
163
103202 4.0
161
164
162
165
Args:
163
- df: A pandas DataFrame.
166
+ df: A pandas DataFrame or DataFrameGroupBy object .
164
167
args: Either a dictionary or a tuple.
165
168
by: Column(s) to group by.
166
169
@@ -171,8 +174,10 @@ def summarise(
171
174
A pandas DataFrame with aggregated columns.
172
175
173
176
""" # noqa: E501
174
-
175
- if by is not None :
177
+ if isinstance (df , DataFrameGroupBy ):
178
+ by = df
179
+ df = df .obj
180
+ elif by is not None :
176
181
# it is assumed that by is created from df
177
182
# onus is on user to ensure that
178
183
if isinstance (by , DataFrameGroupBy ):
@@ -233,7 +238,7 @@ def _aggfunc(arg, df, by):
233
238
val = df
234
239
else :
235
240
val = by
236
- outcome = _process_maybe_callable ( func = arg , obj = val )
241
+ outcome = apply_if_callable ( maybe_callable = arg , obj = val )
237
242
if isinstance (outcome , pd .Series ):
238
243
if not outcome .name :
239
244
raise ValueError ("Ensure the pandas Series object has a name" )
@@ -270,10 +275,11 @@ def _(arg, df, by):
270
275
if len (aggfunc ) != 2 :
271
276
raise ValueError ("the tuple has to be a length of 2" )
272
277
column , func = aggfunc
273
- column_ = _handle_tuple_groupby_selection (by = by , column = column )
274
- column = _apply_func_to_obj (aggfunc = func , obj = val [column_ ])
275
- if isinstance (column , pd .DataFrame ) and column .shape [- 1 ] == 1 :
278
+ column = val .agg ({column : func })
279
+ try :
276
280
column = column .squeeze ()
281
+ except AttributeError :
282
+ pass
277
283
column = _convert_obj_to_named_series (
278
284
obj = column ,
279
285
column_name = column_name ,
@@ -285,54 +291,20 @@ def _(arg, df, by):
285
291
f"instead got { type (column )} "
286
292
)
287
293
else :
288
- column_ = _handle_tuple_groupby_selection (
289
- by = by , column = column_name
290
- )
291
- column = _apply_func_to_obj (aggfunc = aggfunc , obj = val [column_ ])
294
+ column = val .agg ({column_name : aggfunc })
295
+ try :
296
+ column = column .squeeze ()
297
+ except AttributeError :
298
+ pass
292
299
column = _convert_obj_to_named_series (
293
300
obj = column ,
294
301
column_name = column_name ,
295
302
function = aggfunc ,
296
303
)
297
- column = _rename_column_in_by (
298
- column = column , column_name = column_name , by = by
299
- )
300
304
contents .append (column )
301
305
return contents
302
306
303
307
304
- def _process_maybe_callable (func : callable , obj ):
305
- """Function to handle callables"""
306
- try :
307
- column = obj .agg (func )
308
- except : # noqa: E722
309
- column = apply_if_callable (maybe_callable = func , obj = obj )
310
- return column
311
-
312
-
313
- def _process_maybe_string (func : str , obj ):
314
- """Function to handle pandas string functions"""
315
- # treat as a pandas approved string function
316
- # https://pandas.pydata.org/docs/user_guide/groupby.html#built-in-aggregation-methods
317
- return obj .agg (func )
318
-
319
-
320
- def _apply_func_to_obj (aggfunc , obj ):
321
- """Handle str/callables within a dictionary"""
322
- if isinstance (aggfunc , str ):
323
- return _process_maybe_string (func = aggfunc , obj = obj )
324
- return _process_maybe_callable (func = aggfunc , obj = obj )
325
-
326
-
327
- def _handle_tuple_groupby_selection (by : Any , column : Any ):
328
- """
329
- Properly handle a tuple column selection in the presence of a groupby
330
- """
331
- if (by is not None ) and isinstance (column , tuple ):
332
- return [column ]
333
- return column
334
-
335
-
336
308
def _convert_obj_to_named_series (obj , function : Any , column_name : Any ):
337
309
if isinstance (obj , pd .Series ):
338
310
obj .name = column_name
@@ -344,12 +316,3 @@ def _convert_obj_to_named_series(obj, function: Any, column_name: Any):
344
316
else :
345
317
function_name = function .__name__
346
318
return pd .Series (data = obj , index = [function_name ], name = column_name )
347
-
348
-
349
- def _rename_column_in_by (column , column_name , by ):
350
- if by is None :
351
- return column
352
- elif isinstance (column , pd .DataFrame ) and is_scalar (column_name ):
353
- columns = pd .MultiIndex .from_product ([[column_name ], column .columns ])
354
- column .columns = columns
355
- return column
0 commit comments