Skip to content

Commit 7e1e855

Browse files
Added tests, fixed some bugs and added a release note
1 parent 444de67 commit 7e1e855

File tree

4 files changed

+144
-18
lines changed

4 files changed

+144
-18
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ Other enhancements
6565
- :class:`Rolling` and :class:`Expanding` now support aggregations ``first`` and ``last`` (:issue:`33155`)
6666
- :func:`read_parquet` accepts ``to_pandas_kwargs`` which are forwarded to :meth:`pyarrow.Table.to_pandas` which enables passing additional keywords to customize the conversion to pandas, such as ``maps_as_pydicts`` to read the Parquet map data type as python dictionaries (:issue:`56842`)
6767
- :meth:`.DataFrameGroupBy.transform`, :meth:`.SeriesGroupBy.transform`, :meth:`.DataFrameGroupBy.agg`, :meth:`.SeriesGroupBy.agg`, :meth:`.SeriesGroupBy.apply`, :meth:`.DataFrameGroupBy.apply` now support ``kurt`` (:issue:`40139`)
68+
- :meth:`DataFrame.apply` supports using third-party execution engines like the Bodo.ai JIT compiler (:issue:`60668`)
6869
- :meth:`DataFrameGroupBy.transform`, :meth:`SeriesGroupBy.transform`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, :meth:`RollingGroupby.apply`, :meth:`ExpandingGroupby.apply`, :meth:`Rolling.apply`, :meth:`Expanding.apply`, :meth:`DataFrame.apply` with ``engine="numba"`` now supports positional arguments passed as kwargs (:issue:`58995`)
6970
- :meth:`Rolling.agg`, :meth:`Expanding.agg` and :meth:`ExponentialMovingWindow.agg` now accept :class:`NamedAgg` aggregations through ``**kwargs`` (:issue:`28333`)
7071
- :meth:`Series.map` can now accept kwargs to pass on to func (:issue:`59814`)

pandas/core/apply.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ def map(
120120
For JIT compilers and other engines that need to decorate the
121121
function ``func``, this is the decorator to use. While the
122122
executor may already know which is the decorator to use, this
123-
is useful as for a single executor the user can specify for a
123+
is useful as for a single executor the user can specify for
124124
example ``numba.jit`` or ``numba.njit(nogil=True)``, and this
125125
decorator parameter will contain the exact decortor from the
126126
executor the user wants to use.
@@ -163,7 +163,7 @@ def apply(
163163
For JIT compilers and other engines that need to decorate the
164164
function ``func``, this is the decorator to use. While the
165165
executor may already know which is the decorator to use, this
166-
is useful as for a single executor the user can specify for a
166+
is useful as for a single executor the user can specify for
167167
example ``numba.jit`` or ``numba.njit(nogil=True)``, and this
168168
decorator parameter will contain the exact decortor from the
169169
executor the user wants to use.

pandas/core/frame.py

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10459,6 +10459,9 @@ def apply(
1045910459
if engine is None:
1046010460
engine = "python"
1046110461

10462+
if engine not in ["python", "numba"]:
10463+
raise ValueError(f"Unknown engine '{engine}'")
10464+
1046210465
op = frame_apply(
1046310466
self,
1046410467
func=func,
@@ -10478,21 +10481,56 @@ def apply(
1047810481
f"{result_type=} only implemented for the default engine"
1047910482
)
1048010483

10484+
agg_axis = self._get_agg_axis(axis)
10485+
10486+
# one axis is empty
10487+
if not all(self.shape):
10488+
try:
10489+
if axis == 0:
10490+
r = func(Series([], dtype=np.float64), *args, **kwargs)
10491+
else:
10492+
r = func(
10493+
Series(index=self.columns, dtype=np.float64),
10494+
*args,
10495+
**kwargs,
10496+
)
10497+
except Exception:
10498+
pass
10499+
else:
10500+
if not isinstance(r, Series):
10501+
if len(agg_axis):
10502+
r = func(Series([], dtype=np.float64), *args, **kwargs)
10503+
else:
10504+
r = np.nan
10505+
10506+
return self._constructor_sliced(r, index=agg_axis)
10507+
return self.copy()
10508+
1048110509
data = self
1048210510
if raw:
1048310511
# This will upcast the whole DataFrame to the same type,
1048410512
# and likely result in an object 2D array.
1048510513
# We should probably pass a list of 1D arrays instead, at
1048610514
# lest for ``axis=0``
1048710515
data = data.values
10488-
return engine.__pandas_udf__.apply(
10516+
result = engine.__pandas_udf__.apply(
1048910517
data=data,
1049010518
func=func,
1049110519
args=args,
1049210520
kwargs=kwargs,
1049310521
decorator=engine,
1049410522
axis=axis,
1049510523
)
10524+
if raw:
10525+
if result.ndim == 2:
10526+
return self._constructor(
10527+
result, index=self.index, columns=self.columns
10528+
)
10529+
else:
10530+
return self._constructor_sliced(result, index=agg_axis)
10531+
return result
10532+
else:
10533+
raise ValueError(f"Unknown engine {engine}")
1049610534

1049710535
def map(
1049810536
self, func: PythonFuncType, na_action: Literal["ignore"] | None = None, **kwargs

pandas/tests/apply/test_frame_apply.py

Lines changed: 102 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,63 @@
1717
date_range,
1818
)
1919
import pandas._testing as tm
20+
from pandas.api.executors import BaseExecutionEngine
2021
from pandas.tests.frame.common import zip_frames
2122
from pandas.util.version import Version
2223

2324

25+
class MockExecutionEngine(BaseExecutionEngine):
26+
"""
27+
Execution Engine to test if the execution engine interface receives and
28+
uses all parameters provided by the user.
29+
30+
Making this engine work as the default Python engine by calling it, no extra
31+
functionality is implemented here.
32+
33+
When testing, this will be called when this engine is provided, and then the
34+
same pandas.map and pandas.apply function will be called, but without engine,
35+
executing the default behavior from the python engine.
36+
"""
37+
38+
def map(data, func, args, kwargs, decorator, skip_na):
39+
kwargs_to_pass = kwargs if isinstance(data, DataFrame) else {}
40+
return data.map(
41+
func, action_na="ignore" if skip_na else False, **kwargs_to_pass
42+
)
43+
44+
def apply(data, func, args, kwargs, decorator, axis):
45+
if isinstance(data, Series):
46+
return data.apply(func, convert_dtype=True, args=args, by_row=False)
47+
elif isinstance(data, DataFrame):
48+
return data.apply(
49+
func,
50+
axis=axis,
51+
raw=False,
52+
result_type=None,
53+
args=args,
54+
by_row="compat",
55+
**kwargs,
56+
)
57+
else:
58+
assert isinstance(data, np.ndarray)
59+
60+
def wrap_function(func):
61+
# https://github.com/numpy/numpy/issues/8352
62+
def wrapper(*args, **kwargs):
63+
result = func(*args, **kwargs)
64+
if isinstance(result, str):
65+
result = np.array(result, dtype=object)
66+
return result
67+
68+
return wrapper
69+
70+
return np.apply_along_axis(wrap_function(func), axis, data, *args, **kwargs)
71+
72+
73+
class MockEngineDecorator:
74+
__pandas_udf__ = MockExecutionEngine
75+
76+
2477
@pytest.fixture
2578
def int_frame_const_col():
2679
"""
@@ -35,7 +88,13 @@ def int_frame_const_col():
3588
return df
3689

3790

38-
@pytest.fixture(params=["python", pytest.param("numba", marks=pytest.mark.single_cpu)])
91+
@pytest.fixture(
92+
params=[
93+
"python",
94+
pytest.param("numba", marks=pytest.mark.single_cpu),
95+
MockEngineDecorator,
96+
]
97+
)
3998
def engine(request):
4099
if request.param == "numba":
41100
pytest.importorskip("numba")
@@ -1079,12 +1138,21 @@ def test_result_type_broadcast(int_frame_const_col, request, engine):
10791138
mark = pytest.mark.xfail(reason="numba engine doesn't support list return")
10801139
request.node.add_marker(mark)
10811140
df = int_frame_const_col
1082-
# broadcast result
1083-
result = df.apply(
1084-
lambda x: [1, 2, 3], axis=1, result_type="broadcast", engine=engine
1085-
)
1086-
expected = df.copy()
1087-
tm.assert_frame_equal(result, expected)
1141+
if engine is MockEngineDecorator:
1142+
with pytest.raises(
1143+
NotImplementedError,
1144+
match="result_type='broadcast' only implemented for the default engine",
1145+
):
1146+
df.apply(
1147+
lambda x: [1, 2, 3], axis=1, result_type="broadcast", engine=engine
1148+
)
1149+
else:
1150+
# broadcast result
1151+
result = df.apply(
1152+
lambda x: [1, 2, 3], axis=1, result_type="broadcast", engine=engine
1153+
)
1154+
expected = df.copy()
1155+
tm.assert_frame_equal(result, expected)
10881156

10891157

10901158
def test_result_type_broadcast_series_func(int_frame_const_col, engine, request):
@@ -1097,14 +1165,27 @@ def test_result_type_broadcast_series_func(int_frame_const_col, engine, request)
10971165
request.node.add_marker(mark)
10981166
df = int_frame_const_col
10991167
columns = ["other", "col", "names"]
1100-
result = df.apply(
1101-
lambda x: Series([1, 2, 3], index=columns),
1102-
axis=1,
1103-
result_type="broadcast",
1104-
engine=engine,
1105-
)
1106-
expected = df.copy()
1107-
tm.assert_frame_equal(result, expected)
1168+
1169+
if engine is MockEngineDecorator:
1170+
with pytest.raises(
1171+
NotImplementedError,
1172+
match="result_type='broadcast' only implemented for the default engine",
1173+
):
1174+
df.apply(
1175+
lambda x: Series([1, 2, 3], index=columns),
1176+
axis=1,
1177+
result_type="broadcast",
1178+
engine=engine,
1179+
)
1180+
else:
1181+
result = df.apply(
1182+
lambda x: Series([1, 2, 3], index=columns),
1183+
axis=1,
1184+
result_type="broadcast",
1185+
engine=engine,
1186+
)
1187+
expected = df.copy()
1188+
tm.assert_frame_equal(result, expected)
11081189

11091190

11101191
def test_result_type_series_result(int_frame_const_col, engine, request):
@@ -1791,3 +1872,9 @@ def test_agg_dist_like_and_nonunique_columns():
17911872
result = df.agg({"A": "count"})
17921873
expected = df["A"].count()
17931874
tm.assert_series_equal(result, expected)
1875+
1876+
1877+
@pytest.mark.parametrize("engine_name", ["unknown", 25])
1878+
def test_wrong_engine(engine_name):
1879+
with pytest.raises(ValueError, match="Unknown engine "):
1880+
DataFrame().apply(lambda x: x, engine=engine_name)

0 commit comments

Comments
 (0)