1
- """Function for mutation of a column or columns ."""
1
+ """Alternative function to pd.agg for summarizing data ."""
2
2
from typing import Any
3
3
import pandas as pd
4
4
import pandas_flavor as pf
@@ -28,10 +28,6 @@ def summarize(
28
28
for the entire dataframe,
29
29
or a row per group, if `by` is present.
30
30
31
- A nested dictionary can be provided,
32
- for passing new column names.
33
- Have a look at the examples below for usage.
34
-
35
31
If the variable argument is a tuple,
36
32
it has to be of the form `(columns, func, names_glue)`;
37
33
the `names_glue` argument is optional.
@@ -66,7 +62,10 @@ def summarize(
66
62
... 'combine_id': [100200, 100200, 101200, 101200, 102201, 103202],
67
63
... 'category': ['heats', 'heats', 'finals', 'finals', 'heats', 'finals']}
68
64
>>> df = pd.DataFrame(data)
69
- >>> df.summarize({"avg_run":"mean"}, by=['combine_id', 'category'])
65
+ >>> (df
66
+ ... .select_columns('combine_id', 'category', 'avg_run')
67
+ ... .summarize({"avg_run":"mean"}, by=['combine_id', 'category'])
68
+ ... )
70
69
avg_run
71
70
combine_id category
72
71
100200 heats 3.5
@@ -76,10 +75,10 @@ def summarize(
76
75
77
76
Summarize with a new column name:
78
77
79
- >>> df.summarize({"avg_run":{" avg_run_2":" mean"} })
78
+ >>> df.summarize({"avg_run_2":df.avg_run. mean() })
80
79
avg_run_2
81
80
0 2.833333
82
- >>> df.summarize({"avg_run":{" avg_run_2":" mean"}} , by=['combine_id', 'category'])
81
+ >>> df.summarize({"avg_run_2":lambda f: f.avg_run. mean() , by=['combine_id', 'category'])
83
82
avg_run_2
84
83
combine_id category
85
84
100200 heats 3.5
0 commit comments