Skip to content

Commit f330f1a

Browse files
committed
Merge branch 'main' into daydst2
2 parents cc476fb + 566e592 commit f330f1a

File tree

19 files changed

+639
-275
lines changed

19 files changed

+639
-275
lines changed

ci/code_checks.sh

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
8484
-i "pandas.MultiIndex.get_loc_level PR07" \
8585
-i "pandas.MultiIndex.levshape SA01" \
8686
-i "pandas.MultiIndex.names SA01" \
87-
-i "pandas.MultiIndex.nlevels SA01" \
8887
-i "pandas.MultiIndex.remove_unused_levels RT03,SA01" \
8988
-i "pandas.MultiIndex.reorder_levels RT03,SA01" \
9089
-i "pandas.MultiIndex.set_levels RT03,SA01" \
@@ -465,7 +464,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
465464
-i "pandas.io.stata.StataReader.variable_labels RT03,SA01" \
466465
-i "pandas.io.stata.StataWriter.write_file SA01" \
467466
-i "pandas.json_normalize RT03,SA01" \
468-
-i "pandas.merge PR07" \
469467
-i "pandas.merge_asof PR07,RT03" \
470468
-i "pandas.period_range RT03,SA01" \
471469
-i "pandas.plotting.andrews_curves RT03,SA01" \

doc/data/titanic.csv

Lines changed: 222 additions & 222 deletions
Large diffs are not rendered by default.
53.1 KB
Loading

doc/source/getting_started/intro_tutorials/01_table_oriented.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ I want to store passenger data of the Titanic. For a number of passengers, I kno
4646
"Name": [
4747
"Braund, Mr. Owen Harris",
4848
"Allen, Mr. William Henry",
49-
"Bonnell, Miss. Elizabeth",
49+
"Bonnell, Miss Elizabeth",
5050
],
5151
"Age": [22, 35, 58],
5252
"Sex": ["male", "male", "female"],

doc/source/user_guide/missing_data.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -353,7 +353,7 @@ this behaviour and include NA values in the calculation, use ``skipna=False``.
353353
Dropping missing data
354354
~~~~~~~~~~~~~~~~~~~~~
355355

356-
:meth:`~DataFrame.dropna` dropa rows or columns with missing data.
356+
:meth:`~DataFrame.dropna` drops rows or columns with missing data.
357357

358358
.. ipython:: python
359359

doc/source/whatsnew/v3.0.0.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -544,6 +544,7 @@ I/O
544544
- Bug in :class:`DataFrame` and :class:`Series` ``repr`` of :py:class:`collections.abc.Mapping`` elements. (:issue:`57915`)
545545
- Bug in :meth:`DataFrame.to_dict` raises unnecessary ``UserWarning`` when columns are not unique and ``orient='tight'``. (:issue:`58281`)
546546
- Bug in :meth:`DataFrame.to_excel` when writing empty :class:`DataFrame` with :class:`MultiIndex` on both axes (:issue:`57696`)
547+
- Bug in :meth:`DataFrame.to_stata` when writing :class:`DataFrame` and ``byteorder=`big```. (:issue:`58969`)
547548
- Bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`)
548549
- Bug in :meth:`read_csv` raising ``TypeError`` when ``index_col`` is specified and ``na_values`` is a dict containing the key ``None``. (:issue:`57547`)
549550
- Bug in :meth:`read_stata` raising ``KeyError`` when input file is stored in big-endian format and contains strL data. (:issue:`58638`)
@@ -595,8 +596,10 @@ Styler
595596
Other
596597
^^^^^
597598
- Bug in :class:`DataFrame` when passing a ``dict`` with a NA scalar and ``columns`` that would always return ``np.nan`` (:issue:`57205`)
599+
- Bug in :func:`eval` on :class:`ExtensionArray` on including division ``/`` failed with a ``TypeError``. (:issue:`58748`)
598600
- Bug in :func:`eval` where the names of the :class:`Series` were not preserved when using ``engine="numexpr"``. (:issue:`10239`)
599601
- Bug in :func:`unique` on :class:`Index` not always returning :class:`Index` (:issue:`57043`)
602+
- Bug in :meth:`DataFrame.apply` where passing ``engine="numba"`` ignored ``args`` passed to the applied function (:issue:`58712`)
600603
- Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which caused an exception when using NumPy attributes via ``@`` notation, e.g., ``df.eval("@np.floor(a)")``. (:issue:`58041`)
601604
- Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which did not allow to use ``tan`` function. (:issue:`55091`)
602605
- Bug in :meth:`DataFrame.sort_index` when passing ``axis="columns"`` and ``ignore_index=True`` and ``ascending=False`` not returning a :class:`RangeIndex` columns (:issue:`57293`)

pandas/core/_numba/executor.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,17 +14,19 @@
1414

1515
from pandas.compat._optional import import_optional_dependency
1616

17+
from pandas.core.util.numba_ import jit_user_function
18+
1719

1820
@functools.cache
1921
def generate_apply_looper(func, nopython=True, nogil=True, parallel=False):
2022
if TYPE_CHECKING:
2123
import numba
2224
else:
2325
numba = import_optional_dependency("numba")
24-
nb_compat_func = numba.extending.register_jitable(func)
26+
nb_compat_func = jit_user_function(func)
2527

2628
@numba.jit(nopython=nopython, nogil=nogil, parallel=parallel)
27-
def nb_looper(values, axis):
29+
def nb_looper(values, axis, *args):
2830
# Operate on the first row/col in order to get
2931
# the output shape
3032
if axis == 0:
@@ -33,7 +35,7 @@ def nb_looper(values, axis):
3335
else:
3436
first_elem = values[0]
3537
dim0 = values.shape[0]
36-
res0 = nb_compat_func(first_elem)
38+
res0 = nb_compat_func(first_elem, *args)
3739
# Use np.asarray to get shape for
3840
# https://github.com/numba/numba/issues/4202#issuecomment-1185981507
3941
buf_shape = (dim0,) + np.atleast_1d(np.asarray(res0)).shape
@@ -44,11 +46,11 @@ def nb_looper(values, axis):
4446
if axis == 1:
4547
buff[0] = res0
4648
for i in numba.prange(1, values.shape[0]):
47-
buff[i] = nb_compat_func(values[i])
49+
buff[i] = nb_compat_func(values[i], *args)
4850
else:
4951
buff[:, 0] = res0
5052
for j in numba.prange(1, values.shape[1]):
51-
buff[:, j] = nb_compat_func(values[:, j])
53+
buff[:, j] = nb_compat_func(values[:, j], *args)
5254
return buff
5355

5456
return nb_looper

pandas/core/apply.py

Lines changed: 23 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,10 @@
5151
from pandas.core._numba.executor import generate_apply_looper
5252
import pandas.core.common as com
5353
from pandas.core.construction import ensure_wrapped_if_datetimelike
54+
from pandas.core.util.numba_ import (
55+
get_jit_arguments,
56+
prepare_function_arguments,
57+
)
5458

5559
if TYPE_CHECKING:
5660
from collections.abc import (
@@ -70,7 +74,6 @@
7074
from pandas.core.resample import Resampler
7175
from pandas.core.window.rolling import BaseWindow
7276

73-
7477
ResType = dict[int, Any]
7578

7679

@@ -997,17 +1000,20 @@ def wrapper(*args, **kwargs):
9971000
return wrapper
9981001

9991002
if engine == "numba":
1000-
engine_kwargs = {} if engine_kwargs is None else engine_kwargs
1001-
1003+
args, kwargs = prepare_function_arguments(
1004+
self.func, # type: ignore[arg-type]
1005+
self.args,
1006+
self.kwargs,
1007+
)
10021008
# error: Argument 1 to "__call__" of "_lru_cache_wrapper" has
10031009
# incompatible type "Callable[..., Any] | str | list[Callable
10041010
# [..., Any] | str] | dict[Hashable,Callable[..., Any] | str |
10051011
# list[Callable[..., Any] | str]]"; expected "Hashable"
10061012
nb_looper = generate_apply_looper(
10071013
self.func, # type: ignore[arg-type]
1008-
**engine_kwargs,
1014+
**get_jit_arguments(engine_kwargs, kwargs),
10091015
)
1010-
result = nb_looper(self.values, self.axis)
1016+
result = nb_looper(self.values, self.axis, *args)
10111017
# If we made the result 2-D, squeeze it back to 1-D
10121018
result = np.squeeze(result)
10131019
else:
@@ -1148,21 +1154,23 @@ def generate_numba_apply_func(
11481154
# Currently the parallel argument doesn't get passed through here
11491155
# (it's disabled) since the dicts in numba aren't thread-safe.
11501156
@numba.jit(nogil=nogil, nopython=nopython, parallel=parallel)
1151-
def numba_func(values, col_names, df_index):
1157+
def numba_func(values, col_names, df_index, *args):
11521158
results = {}
11531159
for j in range(values.shape[1]):
11541160
# Create the series
11551161
ser = Series(
11561162
values[:, j], index=df_index, name=maybe_cast_str(col_names[j])
11571163
)
1158-
results[j] = jitted_udf(ser)
1164+
results[j] = jitted_udf(ser, *args)
11591165
return results
11601166

11611167
return numba_func
11621168

11631169
def apply_with_numba(self) -> dict[int, Any]:
1170+
func = cast(Callable, self.func)
1171+
args, kwargs = prepare_function_arguments(func, self.args, self.kwargs)
11641172
nb_func = self.generate_numba_apply_func(
1165-
cast(Callable, self.func), **self.engine_kwargs
1173+
func, **get_jit_arguments(self.engine_kwargs, kwargs)
11661174
)
11671175
from pandas.core._numba.extensions import set_numba_data
11681176

@@ -1177,7 +1185,7 @@ def apply_with_numba(self) -> dict[int, Any]:
11771185
# Convert from numba dict to regular dict
11781186
# Our isinstance checks in the df constructor don't pass for numbas typed dict
11791187
with set_numba_data(index) as index, set_numba_data(columns) as columns:
1180-
res = dict(nb_func(self.values, columns, index))
1188+
res = dict(nb_func(self.values, columns, index, *args))
11811189
return res
11821190

11831191
@property
@@ -1285,7 +1293,7 @@ def generate_numba_apply_func(
12851293
jitted_udf = numba.extending.register_jitable(func)
12861294

12871295
@numba.jit(nogil=nogil, nopython=nopython, parallel=parallel)
1288-
def numba_func(values, col_names_index, index):
1296+
def numba_func(values, col_names_index, index, *args):
12891297
results = {}
12901298
# Currently the parallel argument doesn't get passed through here
12911299
# (it's disabled) since the dicts in numba aren't thread-safe.
@@ -1297,15 +1305,17 @@ def numba_func(values, col_names_index, index):
12971305
index=col_names_index,
12981306
name=maybe_cast_str(index[i]),
12991307
)
1300-
results[i] = jitted_udf(ser)
1308+
results[i] = jitted_udf(ser, *args)
13011309

13021310
return results
13031311

13041312
return numba_func
13051313

13061314
def apply_with_numba(self) -> dict[int, Any]:
1315+
func = cast(Callable, self.func)
1316+
args, kwargs = prepare_function_arguments(func, self.args, self.kwargs)
13071317
nb_func = self.generate_numba_apply_func(
1308-
cast(Callable, self.func), **self.engine_kwargs
1318+
func, **get_jit_arguments(self.engine_kwargs, kwargs)
13091319
)
13101320

13111321
from pandas.core._numba.extensions import set_numba_data
@@ -1316,7 +1326,7 @@ def apply_with_numba(self) -> dict[int, Any]:
13161326
set_numba_data(self.obj.index) as index,
13171327
set_numba_data(self.columns) as columns,
13181328
):
1319-
res = dict(nb_func(self.values, columns, index))
1329+
res = dict(nb_func(self.values, columns, index, *args))
13201330

13211331
return res
13221332

pandas/core/arrays/datetimes.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2128,6 +2128,32 @@ def isocalendar(self) -> DataFrame:
21282128
21292129
>>> idx.is_year_start
21302130
array([False, False, True])
2131+
2132+
This method, when applied to Series with datetime values under
2133+
the ``.dt`` accessor, will lose information about Business offsets.
2134+
2135+
>>> dates = pd.Series(pd.date_range("2020-10-30", periods=4, freq="BYS"))
2136+
>>> dates
2137+
0 2021-01-01
2138+
1 2022-01-03
2139+
2 2023-01-02
2140+
3 2024-01-01
2141+
dtype: datetime64[ns]
2142+
2143+
>>> dates.dt.is_year_start
2144+
0 True
2145+
1 False
2146+
2 False
2147+
3 True
2148+
dtype: bool
2149+
2150+
>>> idx = pd.date_range("2020-10-30", periods=4, freq="BYS")
2151+
>>> idx
2152+
DatetimeIndex(['2021-01-01', '2022-01-03', '2023-01-02', '2024-01-01'],
2153+
dtype='datetime64[ns]', freq='BYS-JAN')
2154+
2155+
>>> idx.is_year_start
2156+
array([ True, True, True, True])
21312157
""",
21322158
)
21332159
is_year_end = _field_accessor(

pandas/core/computation/ops.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
from pandas.core.dtypes.common import (
2121
is_list_like,
22+
is_numeric_dtype,
2223
is_scalar,
2324
)
2425

@@ -508,10 +509,6 @@ def _disallow_scalar_only_bool_ops(self) -> None:
508509
raise NotImplementedError("cannot evaluate scalar only bool ops")
509510

510511

511-
def isnumeric(dtype) -> bool:
512-
return issubclass(np.dtype(dtype).type, np.number)
513-
514-
515512
class Div(BinOp):
516513
"""
517514
Div operator to special case casting.
@@ -525,7 +522,9 @@ class Div(BinOp):
525522
def __init__(self, lhs, rhs) -> None:
526523
super().__init__("/", lhs, rhs)
527524

528-
if not isnumeric(lhs.return_type) or not isnumeric(rhs.return_type):
525+
if not is_numeric_dtype(lhs.return_type) or not is_numeric_dtype(
526+
rhs.return_type
527+
):
529528
raise TypeError(
530529
f"unsupported operand type(s) for {self.op}: "
531530
f"'{lhs.return_type}' and '{rhs.return_type}'"

0 commit comments

Comments
 (0)