pandas-dev
diff --git a/‎ci/code_checks.sh
Lines changed: 0 additions & 2 deletions b/‎ci/code_checks.sh
Lines changed: 0 additions & 2 deletions
diff --git a/‎doc/data/titanic.csv
Lines changed: 222 additions & 222 deletions b/‎doc/data/titanic.csv
Lines changed: 222 additions & 222 deletions
diff --git a/‎doc/source/_static/schemas/01_table_spreadsheet.png
53.1 KB b/‎doc/source/_static/schemas/01_table_spreadsheet.png
53.1 KB
diff --git a/‎doc/source/getting_started/intro_tutorials/01_table_oriented.rst
Lines changed: 1 addition & 1 deletion b/‎doc/source/getting_started/intro_tutorials/01_table_oriented.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/user_guide/missing_data.rst
Lines changed: 1 addition & 1 deletion b/‎doc/source/user_guide/missing_data.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/whatsnew/v3.0.0.rst
Lines changed: 3 additions & 0 deletions b/‎doc/source/whatsnew/v3.0.0.rst
Lines changed: 3 additions & 0 deletions
diff --git a/‎pandas/core/_numba/executor.py
Lines changed: 7 additions & 5 deletions b/‎pandas/core/_numba/executor.py
Lines changed: 7 additions & 5 deletions
diff --git a/‎pandas/core/apply.py
Lines changed: 23 additions & 13 deletions b/‎pandas/core/apply.py
Lines changed: 23 additions & 13 deletions
diff --git a/‎pandas/core/arrays/datetimes.py
Lines changed: 26 additions & 0 deletions b/‎pandas/core/arrays/datetimes.py
Lines changed: 26 additions & 0 deletions
diff --git a/‎pandas/core/computation/ops.py
Lines changed: 4 additions & 5 deletions b/‎pandas/core/computation/ops.py
Lines changed: 4 additions & 5 deletions
@@ -84,7 +84,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.MultiIndex.get_loc_level PR07" \
         -i "pandas.MultiIndex.levshape SA01" \
         -i "pandas.MultiIndex.names SA01" \
-        -i "pandas.MultiIndex.nlevels SA01" \
         -i "pandas.MultiIndex.remove_unused_levels RT03,SA01" \
         -i "pandas.MultiIndex.reorder_levels RT03,SA01" \
         -i "pandas.MultiIndex.set_levels RT03,SA01" \
@@ -465,7 +464,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.io.stata.StataReader.variable_labels RT03,SA01" \
         -i "pandas.io.stata.StataWriter.write_file SA01" \
         -i "pandas.json_normalize RT03,SA01" \
-        -i "pandas.merge PR07" \
         -i "pandas.merge_asof PR07,RT03" \
         -i "pandas.period_range RT03,SA01" \
         -i "pandas.plotting.andrews_curves RT03,SA01" \
 
@@ -46,7 +46,7 @@ I want to store passenger data of the Titanic. For a number of passengers, I kno
             "Name": [
                 "Braund, Mr. Owen Harris",
                 "Allen, Mr. William Henry",
-                "Bonnell, Miss. Elizabeth",
+                "Bonnell, Miss Elizabeth",
             ],
             "Age": [22, 35, 58],
             "Sex": ["male", "male", "female"],
 
@@ -353,7 +353,7 @@ this behaviour and include NA values in the calculation, use ``skipna=False``.
 Dropping missing data
 ~~~~~~~~~~~~~~~~~~~~~
 
-:meth:`~DataFrame.dropna` dropa rows or columns with missing data.
+:meth:`~DataFrame.dropna` drops rows or columns with missing data.
 
 .. ipython:: python
 
 
@@ -544,6 +544,7 @@ I/O
 - Bug in :class:`DataFrame` and :class:`Series` ``repr`` of :py:class:`collections.abc.Mapping`` elements. (:issue:`57915`)
 - Bug in :meth:`DataFrame.to_dict` raises unnecessary ``UserWarning`` when columns are not unique and ``orient='tight'``. (:issue:`58281`)
 - Bug in :meth:`DataFrame.to_excel` when writing empty :class:`DataFrame` with :class:`MultiIndex` on both axes (:issue:`57696`)
+- Bug in :meth:`DataFrame.to_stata` when writing :class:`DataFrame` and ``byteorder=`big```. (:issue:`58969`)
 - Bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`)
 - Bug in :meth:`read_csv` raising ``TypeError`` when ``index_col`` is specified and ``na_values`` is a dict containing the key ``None``. (:issue:`57547`)
 - Bug in :meth:`read_stata` raising ``KeyError`` when input file is stored in big-endian format and contains strL data. (:issue:`58638`)
@@ -595,8 +596,10 @@ Styler
 Other
 ^^^^^
 - Bug in :class:`DataFrame` when passing a ``dict`` with a NA scalar and ``columns`` that would always return ``np.nan`` (:issue:`57205`)
+- Bug in :func:`eval` on :class:`ExtensionArray` on including division ``/`` failed with a ``TypeError``. (:issue:`58748`)
 - Bug in :func:`eval` where the names of the :class:`Series` were not preserved when using ``engine="numexpr"``. (:issue:`10239`)
 - Bug in :func:`unique` on :class:`Index` not always returning :class:`Index` (:issue:`57043`)
+- Bug in :meth:`DataFrame.apply` where passing ``engine="numba"`` ignored ``args`` passed to the applied function (:issue:`58712`)
 - Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which caused an exception when using NumPy attributes via ``@`` notation, e.g., ``df.eval("@np.floor(a)")``. (:issue:`58041`)
 - Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which did not allow to use ``tan`` function. (:issue:`55091`)
 - Bug in :meth:`DataFrame.sort_index` when passing ``axis="columns"`` and ``ignore_index=True`` and ``ascending=False`` not returning a :class:`RangeIndex` columns (:issue:`57293`)
 
@@ -14,17 +14,19 @@
 
 from pandas.compat._optional import import_optional_dependency
 
+from pandas.core.util.numba_ import jit_user_function
+
 
 @functools.cache
 def generate_apply_looper(func, nopython=True, nogil=True, parallel=False):
     if TYPE_CHECKING:
         import numba
     else:
         numba = import_optional_dependency("numba")
-    nb_compat_func = numba.extending.register_jitable(func)
+    nb_compat_func = jit_user_function(func)
 
     @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel)
-    def nb_looper(values, axis):
+    def nb_looper(values, axis, *args):
         # Operate on the first row/col in order to get
         # the output shape
         if axis == 0:
@@ -33,7 +35,7 @@ def nb_looper(values, axis):
         else:
             first_elem = values[0]
             dim0 = values.shape[0]
-        res0 = nb_compat_func(first_elem)
+        res0 = nb_compat_func(first_elem, *args)
         # Use np.asarray to get shape for
         # https://github.com/numba/numba/issues/4202#issuecomment-1185981507
         buf_shape = (dim0,) + np.atleast_1d(np.asarray(res0)).shape
@@ -44,11 +46,11 @@ def nb_looper(values, axis):
         if axis == 1:
             buff[0] = res0
             for i in numba.prange(1, values.shape[0]):
-                buff[i] = nb_compat_func(values[i])
+                buff[i] = nb_compat_func(values[i], *args)
         else:
             buff[:, 0] = res0
             for j in numba.prange(1, values.shape[1]):
-                buff[:, j] = nb_compat_func(values[:, j])
+                buff[:, j] = nb_compat_func(values[:, j], *args)
         return buff
 
     return nb_looper
 
@@ -51,6 +51,10 @@
 from pandas.core._numba.executor import generate_apply_looper
 import pandas.core.common as com
 from pandas.core.construction import ensure_wrapped_if_datetimelike
+from pandas.core.util.numba_ import (
+    get_jit_arguments,
+    prepare_function_arguments,
+)
 
 if TYPE_CHECKING:
     from collections.abc import (
@@ -70,7 +74,6 @@
     from pandas.core.resample import Resampler
     from pandas.core.window.rolling import BaseWindow
 
-
 ResType = dict[int, Any]
 
 
@@ -997,17 +1000,20 @@ def wrapper(*args, **kwargs):
             return wrapper
 
         if engine == "numba":
-            engine_kwargs = {} if engine_kwargs is None else engine_kwargs
-
+            args, kwargs = prepare_function_arguments(
+                self.func,  # type: ignore[arg-type]
+                self.args,
+                self.kwargs,
+            )
             # error: Argument 1 to "__call__" of "_lru_cache_wrapper" has
             # incompatible type "Callable[..., Any] | str | list[Callable
             # [..., Any] | str] | dict[Hashable,Callable[..., Any] | str |
             # list[Callable[..., Any] | str]]"; expected "Hashable"
             nb_looper = generate_apply_looper(
                 self.func,  # type: ignore[arg-type]
-                **engine_kwargs,
+                **get_jit_arguments(engine_kwargs, kwargs),
             )
-            result = nb_looper(self.values, self.axis)
+            result = nb_looper(self.values, self.axis, *args)
             # If we made the result 2-D, squeeze it back to 1-D
             result = np.squeeze(result)
         else:
@@ -1148,21 +1154,23 @@ def generate_numba_apply_func(
         # Currently the parallel argument doesn't get passed through here
         # (it's disabled) since the dicts in numba aren't thread-safe.
         @numba.jit(nogil=nogil, nopython=nopython, parallel=parallel)
-        def numba_func(values, col_names, df_index):
+        def numba_func(values, col_names, df_index, *args):
             results = {}
             for j in range(values.shape[1]):
                 # Create the series
                 ser = Series(
                     values[:, j], index=df_index, name=maybe_cast_str(col_names[j])
                 )
-                results[j] = jitted_udf(ser)
+                results[j] = jitted_udf(ser, *args)
             return results
 
         return numba_func
 
     def apply_with_numba(self) -> dict[int, Any]:
+        func = cast(Callable, self.func)
+        args, kwargs = prepare_function_arguments(func, self.args, self.kwargs)
         nb_func = self.generate_numba_apply_func(
-            cast(Callable, self.func), **self.engine_kwargs
+            func, **get_jit_arguments(self.engine_kwargs, kwargs)
         )
         from pandas.core._numba.extensions import set_numba_data
 
@@ -1177,7 +1185,7 @@ def apply_with_numba(self) -> dict[int, Any]:
         # Convert from numba dict to regular dict
         # Our isinstance checks in the df constructor don't pass for numbas typed dict
         with set_numba_data(index) as index, set_numba_data(columns) as columns:
-            res = dict(nb_func(self.values, columns, index))
+            res = dict(nb_func(self.values, columns, index, *args))
         return res
 
     @property
@@ -1285,7 +1293,7 @@ def generate_numba_apply_func(
         jitted_udf = numba.extending.register_jitable(func)
 
         @numba.jit(nogil=nogil, nopython=nopython, parallel=parallel)
-        def numba_func(values, col_names_index, index):
+        def numba_func(values, col_names_index, index, *args):
             results = {}
             # Currently the parallel argument doesn't get passed through here
             # (it's disabled) since the dicts in numba aren't thread-safe.
@@ -1297,15 +1305,17 @@ def numba_func(values, col_names_index, index):
                     index=col_names_index,
                     name=maybe_cast_str(index[i]),
                 )
-                results[i] = jitted_udf(ser)
+                results[i] = jitted_udf(ser, *args)
 
             return results
 
         return numba_func
 
     def apply_with_numba(self) -> dict[int, Any]:
+        func = cast(Callable, self.func)
+        args, kwargs = prepare_function_arguments(func, self.args, self.kwargs)
         nb_func = self.generate_numba_apply_func(
-            cast(Callable, self.func), **self.engine_kwargs
+            func, **get_jit_arguments(self.engine_kwargs, kwargs)
         )
 
         from pandas.core._numba.extensions import set_numba_data
@@ -1316,7 +1326,7 @@ def apply_with_numba(self) -> dict[int, Any]:
             set_numba_data(self.obj.index) as index,
             set_numba_data(self.columns) as columns,
         ):
-            res = dict(nb_func(self.values, columns, index))
+            res = dict(nb_func(self.values, columns, index, *args))
 
         return res
 
 
@@ -2128,6 +2128,32 @@ def isocalendar(self) -> DataFrame:
 
         >>> idx.is_year_start
         array([False, False,  True])
+
+        This method, when applied to Series with datetime values under
+        the ``.dt`` accessor, will lose information about Business offsets.
+
+        >>> dates = pd.Series(pd.date_range("2020-10-30", periods=4, freq="BYS"))
+        >>> dates
+        0   2021-01-01
+        1   2022-01-03
+        2   2023-01-02
+        3   2024-01-01
+        dtype: datetime64[ns]
+
+        >>> dates.dt.is_year_start
+        0    True
+        1    False
+        2    False
+        3    True
+        dtype: bool
+
+        >>> idx = pd.date_range("2020-10-30", periods=4, freq="BYS")
+        >>> idx
+        DatetimeIndex(['2021-01-01', '2022-01-03', '2023-01-02', '2024-01-01'],
+                      dtype='datetime64[ns]', freq='BYS-JAN')
+
+        >>> idx.is_year_start
+        array([ True,  True,  True,  True])
         """,
     )
     is_year_end = _field_accessor(
 
@@ -19,6 +19,7 @@
 
 from pandas.core.dtypes.common import (
     is_list_like,
+    is_numeric_dtype,
     is_scalar,
 )
 
@@ -508,10 +509,6 @@ def _disallow_scalar_only_bool_ops(self) -> None:
             raise NotImplementedError("cannot evaluate scalar only bool ops")
 
 
-def isnumeric(dtype) -> bool:
-    return issubclass(np.dtype(dtype).type, np.number)
-
-
 class Div(BinOp):
     """
     Div operator to special case casting.
@@ -525,7 +522,9 @@ class Div(BinOp):
     def __init__(self, lhs, rhs) -> None:
         super().__init__("/", lhs, rhs)
 
-        if not isnumeric(lhs.return_type) or not isnumeric(rhs.return_type):
+        if not is_numeric_dtype(lhs.return_type) or not is_numeric_dtype(
+            rhs.return_type
+        ):
             raise TypeError(
                 f"unsupported operand type(s) for {self.op}: "
                 f"'{lhs.return_type}' and '{rhs.return_type}'"