Skip to content

Commit 912a29f

Browse files
author
samuel.oranyeli
committed
fix docs
1 parent 0de0eb7 commit 912a29f

File tree

1 file changed

+7
-8
lines changed

1 file changed

+7
-8
lines changed

janitor/functions/summarize.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
"""Function for mutation of a column or columns."""
1+
"""Alternative function to pd.agg for summarizing data."""
22
from typing import Any
33
import pandas as pd
44
import pandas_flavor as pf
@@ -28,10 +28,6 @@ def summarize(
2828
for the entire dataframe,
2929
or a row per group, if `by` is present.
3030
31-
A nested dictionary can be provided,
32-
for passing new column names.
33-
Have a look at the examples below for usage.
34-
3531
If the variable argument is a tuple,
3632
it has to be of the form `(columns, func, names_glue)`;
3733
the `names_glue` argument is optional.
@@ -66,7 +62,10 @@ def summarize(
6662
... 'combine_id': [100200, 100200, 101200, 101200, 102201, 103202],
6763
... 'category': ['heats', 'heats', 'finals', 'finals', 'heats', 'finals']}
6864
>>> df = pd.DataFrame(data)
69-
>>> df.summarize({"avg_run":"mean"}, by=['combine_id', 'category'])
65+
>>> (df
66+
... .select_columns('combine_id', 'category', 'avg_run')
67+
... .summarize({"avg_run":"mean"}, by=['combine_id', 'category'])
68+
... )
7069
avg_run
7170
combine_id category
7271
100200 heats 3.5
@@ -76,10 +75,10 @@ def summarize(
7675
7776
Summarize with a new column name:
7877
79-
>>> df.summarize({"avg_run":{"avg_run_2":"mean"}})
78+
>>> df.summarize({"avg_run_2":df.avg_run.mean()})
8079
avg_run_2
8180
0 2.833333
82-
>>> df.summarize({"avg_run":{"avg_run_2":"mean"}}, by=['combine_id', 'category'])
81+
>>> df.summarize({"avg_run_2":lambda f: f.avg_run.mean(), by=['combine_id', 'category'])
8382
avg_run_2
8483
combine_id category
8584
100200 heats 3.5

0 commit comments

Comments
 (0)