Skip to content

Commit bdc2d8e

Browse files
authored
Merge pull request #36 from pyjanitor-devs/samukweku/pandas_groupby_flavor
[ENH] Add groupby accessor and method
2 parents a3bd755 + 8ca5a06 commit bdc2d8e

File tree

5 files changed

+201
-6
lines changed

5 files changed

+201
-6
lines changed

.github/workflows/tests.yml

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,18 +18,14 @@ jobs:
1818

1919
# See: https://github.com/marketplace/actions/setup-miniconda
2020
- name: Setup miniconda
21-
uses: conda-incubator/setup-miniconda@v2
21+
uses: conda-incubator/setup-miniconda@v3
2222
with:
2323
auto-update-conda: true
24-
miniforge-variant: Mambaforge
25-
channels: conda-forge
26-
activate-environment: pandas-flavor
2724
environment-file: environment.yml
28-
use-mamba: true
25+
miniforge-version: latest
2926

3027
- name: Run unit tests
3128
run: |
32-
conda activate pandas-flavor
3329
python -m pip install -e .
3430
pytest
3531

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,8 @@ to allow visualization of [pyjanitor](https://github.com/pyjanitor-devs/pyjanito
133133
- **register_dataframe_accessor**: register an accessor (and it's methods) with a pandas DataFrame.
134134
- **register_series_method**: register a methods directly with a pandas Series.
135135
- **register_series_accessor**: register an accessor (and it's methods) with a pandas Series.
136+
- **register_groupby_method**: register a methods directly with a pandas DataFrameGroupBy object.
137+
- **register_groupby_accessor**: register an accessor (and it's methods) with a pandas DataFrameGroupBy object.
136138

137139
## Installation
138140

pandas_flavor/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
register_dataframe_method,
55
register_series_accessor,
66
register_series_method,
7+
register_groupby_accessor,
8+
register_groupby_method,
79
)
810
from .xarray import (
911
register_xarray_dataarray_method,
@@ -15,6 +17,8 @@
1517
"register_series_accessor",
1618
"register_dataframe_method",
1719
"register_dataframe_accessor",
20+
"register_groupby_accessor",
21+
"register_groupby_method",
1822
"register_xarray_dataarray_method",
1923
"register_xarray_dataset_method",
2024
]

pandas_flavor/register.py

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
"""Register functions as methods of Pandas DataFrame and Series."""
2+
3+
from __future__ import annotations
4+
5+
import warnings
26
from functools import wraps
7+
8+
from pandas.core.groupby.generic import DataFrameGroupBy
9+
from pandas.util._exceptions import find_stack_level
310
from pandas.api.extensions import (
411
register_series_accessor,
512
register_dataframe_accessor,
@@ -228,3 +235,162 @@ def __call__(self, *args, **kwargs):
228235
return method
229236

230237
return inner()
238+
239+
240+
# variant of pandas' accessor
241+
242+
# copied from pandas' accessor file - pandas/pandas/core/accessor.py
243+
"""
244+
245+
accessor.py contains base classes for implementing accessor properties
246+
that can be mixed into or pinned onto other pandas classes.
247+
248+
"""
249+
250+
251+
class CachedAccessor:
252+
"""
253+
Custom property-like object.
254+
255+
A descriptor for caching accessors.
256+
257+
Parameters
258+
----------
259+
name : str
260+
Namespace that will be accessed under, e.g. ``df.foo``.
261+
accessor : DataFrameGroupBy
262+
Class with the extension methods.
263+
264+
Notes
265+
-----
266+
For accessor, The class's __init__ method assumes that one of
267+
``Series``, ``DataFrame`` or ``Index`` as the
268+
single argument ``data``.
269+
"""
270+
271+
def __init__(self, name: str, accessor: DataFrameGroupBy) -> None:
272+
self._name = name
273+
self._accessor = accessor
274+
275+
def __get__(self, obj, cls):
276+
if obj is None:
277+
# we're accessing the attribute of the class, i.e., Dataset.geo
278+
return self._accessor
279+
accessor_obj = self._accessor(obj)
280+
# Replace the property with the accessor object. Inspired by:
281+
# https://www.pydanny.com/cached-property.html
282+
# We need to use object.__setattr__ because we overwrite __setattr__ on
283+
# NDFrame
284+
object.__setattr__(obj, self._name, accessor_obj)
285+
return accessor_obj
286+
287+
288+
def _register_accessor(name: str, cls: DataFrameGroupBy):
289+
"""
290+
Register a custom accessor on a DataFrameGroupBy object.
291+
292+
Args:
293+
name : str
294+
Name under which the accessor should be registered.
295+
A warning is issued
296+
if this name conflicts with a preexisting attribute.
297+
cls: DataFrameGroupBy
298+
299+
Returns:
300+
A class decorator.
301+
"""
302+
303+
def decorator(accessor):
304+
if hasattr(cls, name):
305+
warnings.warn(
306+
f"registration of accessor {repr(accessor)} under name "
307+
f"{repr(name)} for type {repr(cls)} "
308+
"is overriding a preexisting "
309+
f"attribute with the same name.",
310+
UserWarning,
311+
stacklevel=find_stack_level(),
312+
)
313+
setattr(cls, name, CachedAccessor(name, accessor))
314+
if not hasattr(cls, "_accessors"):
315+
cls._accessors = set()
316+
cls._accessors.add(name)
317+
return accessor
318+
319+
return decorator
320+
321+
322+
def register_groupby_accessor(name: str):
323+
return _register_accessor(name, DataFrameGroupBy)
324+
325+
326+
def register_groupby_method(method):
327+
"""Register a function as a method attached to the pandas DataFrameGroupBy.
328+
329+
Example:
330+
>>> @register_groupby_method # doctest: +SKIP
331+
>>> def print_column(grp, col): # doctest: +SKIP
332+
... '''Print the dataframe column given''' # doctest: +SKIP
333+
... print(grp[col]) # doctest: +SKIP
334+
335+
!!! info "New in version 0.7.0"
336+
337+
Args:
338+
method: Function to be registered as a method
339+
on the DataFrameGroupBy object.
340+
341+
Returns:
342+
callable: The original method.
343+
"""
344+
method_signature = inspect.signature(method)
345+
346+
def inner(*args: tuple, **kwargs: dict):
347+
"""Inner function to register the method.
348+
349+
This function is called when the user
350+
decorates a function with register_groupby_method.
351+
352+
Args:
353+
*args: The arguments to pass to the registered method.
354+
**kwargs: The keyword arguments to pass to the registered method.
355+
356+
Returns:
357+
method: The original method.
358+
"""
359+
360+
class AccessorMethod(object):
361+
"""DataFrameGroupBy Accessor method class."""
362+
363+
__doc__ = method.__doc__
364+
365+
def __init__(self, obj):
366+
"""Initialize the accessor method class.
367+
368+
Args:
369+
obj: The pandas DataFrameGroupBy object.
370+
"""
371+
self._obj = obj
372+
373+
@wraps(method)
374+
def __call__(self, *args, **kwargs):
375+
"""Call the accessor method.
376+
377+
Args:
378+
*args: The arguments to pass to the registered method.
379+
**kwargs: The keyword arguments to pass
380+
to the registered method.
381+
382+
Returns:
383+
object: The result of calling of the method.
384+
"""
385+
global method_call_ctx_factory
386+
if method_call_ctx_factory is None:
387+
return method(self._obj, *args, **kwargs)
388+
389+
return handle_pandas_extension_call(
390+
method, method_signature, self._obj, args, kwargs
391+
)
392+
393+
register_groupby_accessor(method.__name__)(AccessorMethod)
394+
return method
395+
396+
return inner()

tests/test_pandas_register.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
"""Tests for pandas series and dataframe method registration."""
2+
23
import pandas_flavor as pf
34
import pandas as pd
5+
from pandas.core.groupby.generic import DataFrameGroupBy
46

57

68
def test_register_dataframe_method():
@@ -39,3 +41,28 @@ def dummy_func(s: pd.Series) -> pd.Series:
3941

4042
ser = pd.Series()
4143
ser.dummy_func()
44+
45+
46+
def test_register_groupby_method():
47+
"""Test register_groupby_method."""
48+
49+
@pf.register_groupby_method
50+
def dummy_func(by: DataFrameGroupBy) -> DataFrameGroupBy:
51+
"""Dummy func.
52+
53+
Args:
54+
by: A DataFrameGroupBy object.
55+
56+
Returns:
57+
DataFrameGroupBy.
58+
"""
59+
return by
60+
61+
df = pd.DataFrame(
62+
{
63+
"Animal": ["Falcon"],
64+
"Max Speed": [380.0],
65+
}
66+
)
67+
by = df.groupby("Animal")
68+
by.dummy_func()

0 commit comments

Comments
 (0)