Skip to content

Commit 90f264f

Browse files
committed
Remove Numba-specific logic from FrameApply, added Series import to validate_values_for_numba
1 parent 77eb146 commit 90f264f

File tree

1 file changed

+2
-142
lines changed

1 file changed

+2
-142
lines changed

pandas/core/apply.py

Lines changed: 2 additions & 142 deletions
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,7 @@ def apply_raw_false(
302302

303303
@staticmethod
304304
def validate_values_for_numba(obj: Series | DataFrame) -> None:
305+
from pandas import Series
305306
if isinstance(obj, Series):
306307
if not is_numeric_dtype(obj.dtype):
307308
raise ValueError(
@@ -1115,32 +1116,6 @@ def result_columns(self) -> Index:
11151116
def series_generator(self) -> Generator[Series]:
11161117
pass
11171118

1118-
@staticmethod
1119-
@functools.cache
1120-
@abc.abstractmethod
1121-
def generate_numba_apply_func(
1122-
func, nogil=True, nopython=True, parallel=False
1123-
) -> Callable[[npt.NDArray, Index, Index], dict[int, Any]]:
1124-
pass
1125-
1126-
@abc.abstractmethod
1127-
def apply_with_numba(self):
1128-
pass
1129-
1130-
def validate_values_for_numba(self) -> None:
1131-
# Validate column dtyps all OK
1132-
for colname, dtype in self.obj.dtypes.items():
1133-
if not is_numeric_dtype(dtype):
1134-
raise ValueError(
1135-
f"Column {colname} must have a numeric dtype. "
1136-
f"Found '{dtype}' instead"
1137-
)
1138-
if is_extension_array_dtype(dtype):
1139-
raise ValueError(
1140-
f"Column {colname} is backed by an extension array, "
1141-
f"which is not supported by the numba engine."
1142-
)
1143-
11441119
@abc.abstractmethod
11451120
def wrap_results_for_axis(
11461121
self, results: ResType, res_index: Index
@@ -1327,10 +1302,7 @@ def apply_broadcast(self, target: DataFrame) -> DataFrame:
13271302
return result
13281303

13291304
def apply_standard(self):
1330-
if self.engine == "python":
1331-
results, res_index = self.apply_series_generator()
1332-
else:
1333-
results, res_index = self.apply_series_numba()
1305+
results, res_index = self.apply_series_generator()
13341306

13351307
# wrap results
13361308
return self.wrap_results(results, res_index)
@@ -1352,19 +1324,6 @@ def apply_series_generator(self) -> tuple[ResType, Index]:
13521324

13531325
return results, res_index
13541326

1355-
def apply_series_numba(self):
1356-
if self.engine_kwargs.get("parallel", False):
1357-
raise NotImplementedError(
1358-
"Parallel apply is not supported when raw=False and engine='numba'"
1359-
)
1360-
if not self.obj.index.is_unique or not self.columns.is_unique:
1361-
raise NotImplementedError(
1362-
"The index/columns must be unique when raw=False and engine='numba'"
1363-
)
1364-
self.validate_values_for_numba()
1365-
results = self.apply_with_numba()
1366-
return results, self.result_index
1367-
13681327
def wrap_results(self, results: ResType, res_index: Index) -> DataFrame | Series:
13691328
from pandas import Series
13701329

@@ -1404,54 +1363,6 @@ class FrameRowApply(FrameApply):
14041363
def series_generator(self) -> Generator[Series]:
14051364
return (self.obj._ixs(i, axis=1) for i in range(len(self.columns)))
14061365

1407-
@staticmethod
1408-
@functools.cache
1409-
def generate_numba_apply_func(
1410-
func, nogil=True, nopython=True, parallel=False
1411-
) -> Callable[[npt.NDArray, Index, Index], dict[int, Any]]:
1412-
numba = import_optional_dependency("numba")
1413-
from pandas import Series
1414-
1415-
# Import helper from extensions to cast string object -> np strings
1416-
# Note: This also has the side effect of loading our numba extensions
1417-
from pandas.core._numba.extensions import maybe_cast_str
1418-
1419-
jitted_udf = numba.extending.register_jitable(func)
1420-
1421-
# Currently the parallel argument doesn't get passed through here
1422-
# (it's disabled) since the dicts in numba aren't thread-safe.
1423-
@numba.jit(nogil=nogil, nopython=nopython, parallel=parallel)
1424-
def numba_func(values, col_names, df_index, *args):
1425-
results = {}
1426-
for j in range(values.shape[1]):
1427-
# Create the series
1428-
ser = Series(
1429-
values[:, j], index=df_index, name=maybe_cast_str(col_names[j])
1430-
)
1431-
results[j] = jitted_udf(ser, *args)
1432-
return results
1433-
1434-
return numba_func
1435-
1436-
def apply_with_numba(self) -> dict[int, Any]:
1437-
func = cast(Callable, self.func)
1438-
args, kwargs = prepare_function_arguments(
1439-
func, self.args, self.kwargs, num_required_args=1
1440-
)
1441-
nb_func = self.generate_numba_apply_func(
1442-
func, **get_jit_arguments(self.engine_kwargs)
1443-
)
1444-
from pandas.core._numba.extensions import set_numba_data
1445-
1446-
index = self.obj.index
1447-
columns = self.obj.columns
1448-
1449-
# Convert from numba dict to regular dict
1450-
# Our isinstance checks in the df constructor don't pass for numbas typed dict
1451-
with set_numba_data(index) as index, set_numba_data(columns) as columns:
1452-
res = dict(nb_func(self.values, columns, index, *args))
1453-
return res
1454-
14551366
@property
14561367
def result_index(self) -> Index:
14571368
return self.columns
@@ -1545,57 +1456,6 @@ def series_generator(self) -> Generator[Series]:
15451456
mgr.blocks[0].refs = BlockValuesRefs(mgr.blocks[0])
15461457
yield ser
15471458

1548-
@staticmethod
1549-
@functools.cache
1550-
def generate_numba_apply_func(
1551-
func, nogil=True, nopython=True, parallel=False
1552-
) -> Callable[[npt.NDArray, Index, Index], dict[int, Any]]:
1553-
numba = import_optional_dependency("numba")
1554-
from pandas import Series
1555-
from pandas.core._numba.extensions import maybe_cast_str
1556-
1557-
jitted_udf = numba.extending.register_jitable(func)
1558-
1559-
@numba.jit(nogil=nogil, nopython=nopython, parallel=parallel)
1560-
def numba_func(values, col_names_index, index, *args):
1561-
results = {}
1562-
# Currently the parallel argument doesn't get passed through here
1563-
# (it's disabled) since the dicts in numba aren't thread-safe.
1564-
for i in range(values.shape[0]):
1565-
# Create the series
1566-
# TODO: values corrupted without the copy
1567-
ser = Series(
1568-
values[i].copy(),
1569-
index=col_names_index,
1570-
name=maybe_cast_str(index[i]),
1571-
)
1572-
results[i] = jitted_udf(ser, *args)
1573-
1574-
return results
1575-
1576-
return numba_func
1577-
1578-
def apply_with_numba(self) -> dict[int, Any]:
1579-
func = cast(Callable, self.func)
1580-
args, kwargs = prepare_function_arguments(
1581-
func, self.args, self.kwargs, num_required_args=1
1582-
)
1583-
nb_func = self.generate_numba_apply_func(
1584-
func, **get_jit_arguments(self.engine_kwargs)
1585-
)
1586-
1587-
from pandas.core._numba.extensions import set_numba_data
1588-
1589-
# Convert from numba dict to regular dict
1590-
# Our isinstance checks in the df constructor don't pass for numbas typed dict
1591-
with (
1592-
set_numba_data(self.obj.index) as index,
1593-
set_numba_data(self.columns) as columns,
1594-
):
1595-
res = dict(nb_func(self.values, columns, index, *args))
1596-
1597-
return res
1598-
15991459
@property
16001460
def result_index(self) -> Index:
16011461
return self.index

0 commit comments

Comments
 (0)