From 2fa5d5d32cf5f726a23fb059b769f4739d8c7deb Mon Sep 17 00:00:00 2001 From: nevrohelios Date: Sun, 24 Aug 2025 00:14:12 +0530 Subject: [PATCH 1/5] REF: use _cast_pointwise_result in map --- pandas/core/arrays/arrow/array.py | 3 ++- pandas/core/arrays/base.py | 3 ++- pandas/core/arrays/masked.py | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 221191773186e..fa0eaa5c05279 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -1594,7 +1594,8 @@ def to_numpy( def map(self, mapper, na_action: Literal["ignore"] | None = None): if is_numeric_dtype(self.dtype): - return map_array(self.to_numpy(), mapper, na_action=na_action) + result = map_array(self.to_numpy(), mapper, na_action=na_action) + return self._cast_pointwise_result(result) else: return super().map(mapper, na_action) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 1cd10a9eef9d1..1c5e7881c215c 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -2530,7 +2530,8 @@ def map(self, mapper, na_action: Literal["ignore"] | None = None): If the function returns a tuple with more than one element a MultiIndex will be returned. """ - return map_array(self, mapper, na_action=na_action) + results = map_array(self, mapper, na_action=na_action) + return self._cast_pointwise_result(results) # ------------------------------------------------------------------------ # GroupBy Methods diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 0402452e484ea..e56ca7b0fed39 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -1395,7 +1395,8 @@ def max(self, *, skipna: bool = True, axis: AxisInt | None = 0, **kwargs): return self._wrap_reduction_result("max", result, skipna=skipna, axis=axis) def map(self, mapper, na_action: Literal["ignore"] | None = None): - return map_array(self.to_numpy(), mapper, na_action=na_action) + result = map_array(self.to_numpy(), mapper, na_action=na_action) + return self._cast_pointwise_result(result) @overload def any( From f8f579fdd7211ac7b52595fe290ab6dc90e94bac Mon Sep 17 00:00:00 2001 From: nevrohelios Date: Tue, 9 Sep 2025 01:44:12 +0530 Subject: [PATCH 2/5] added preser_root param --- .gitignore | 3 +++ doc/source/reference/extensions.rst | 1 + pandas/core/arrays/arrow/array.py | 10 +++++++--- pandas/core/arrays/base.py | 14 ++++++++++++-- pandas/core/arrays/masked.py | 9 +++++++-- 5 files changed, 30 insertions(+), 7 deletions(-) diff --git a/.gitignore b/.gitignore index d951f3fb9cbad..84d761ac112ce 100644 --- a/.gitignore +++ b/.gitignore @@ -141,3 +141,6 @@ doc/source/savefig/ # Pyodide/WASM related files # ############################## /.pyodide-xbuildenv-* + +local.py +.venv/ \ No newline at end of file diff --git a/doc/source/reference/extensions.rst b/doc/source/reference/extensions.rst index e412793a328a3..1f5d19d004a4c 100644 --- a/doc/source/reference/extensions.rst +++ b/doc/source/reference/extensions.rst @@ -58,6 +58,7 @@ objects. api.extensions.ExtensionArray.isin api.extensions.ExtensionArray.isna api.extensions.ExtensionArray.ravel + api.extensions.ExtensionArray.map api.extensions.ExtensionArray.repeat api.extensions.ExtensionArray.searchsorted api.extensions.ExtensionArray.shift diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index fa0eaa5c05279..947a5e73a8a71 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -1592,12 +1592,16 @@ def to_numpy( result[~mask] = data[~mask]._pa_array.to_numpy() return result - def map(self, mapper, na_action: Literal["ignore"] | None = None): + def map(self, mapper, + na_action: Literal["ignore"] | None = None, + preserve_dtype: bool = False): if is_numeric_dtype(self.dtype): result = map_array(self.to_numpy(), mapper, na_action=na_action) - return self._cast_pointwise_result(result) + if preserve_dtype: + result = self._cast_pointwise_result(result) + return result else: - return super().map(mapper, na_action) + return super().map(mapper, na_action, preserve_dtype=preserve_dtype) @doc(ExtensionArray.duplicated) def duplicated( diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 1c5e7881c215c..abd5407c0a82b 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -2510,7 +2510,9 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): return arraylike.default_array_ufunc(self, ufunc, method, *inputs, **kwargs) - def map(self, mapper, na_action: Literal["ignore"] | None = None): + def map(self, mapper, + na_action: Literal["ignore"] | None = None, + preserve_dtype: bool = False): """ Map values using an input mapping or function. @@ -2522,6 +2524,12 @@ def map(self, mapper, na_action: Literal["ignore"] | None = None): If 'ignore', propagate NA values, without passing them to the mapping correspondence. If 'ignore' is not supported, a ``NotImplementedError`` should be raised. + preserve_dtype : bool, default False + If True, attempt to cast the elementwise result back to the + original ExtensionArray type (and dtype) when possible. This is + primarily intended for identity or dtype-preserving mappings. + If False, the result of the mapping is returned as produced by + the underlying implementation (typically a NumPy ndarray). Returns ------- @@ -2531,7 +2539,9 @@ def map(self, mapper, na_action: Literal["ignore"] | None = None): a MultiIndex will be returned. """ results = map_array(self, mapper, na_action=na_action) - return self._cast_pointwise_result(results) + if preserve_dtype: + results = self._cast_pointwise_result(results) + return results # ------------------------------------------------------------------------ # GroupBy Methods diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index e56ca7b0fed39..0f4b191cf283e 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -1394,9 +1394,14 @@ def max(self, *, skipna: bool = True, axis: AxisInt | None = 0, **kwargs): ) return self._wrap_reduction_result("max", result, skipna=skipna, axis=axis) - def map(self, mapper, na_action: Literal["ignore"] | None = None): + def map(self, mapper, + na_action: Literal["ignore"] | None = None, + preserve_dtype: bool = False): + """See ExtensionArray.map.""" result = map_array(self.to_numpy(), mapper, na_action=na_action) - return self._cast_pointwise_result(result) + if preserve_dtype: + result = self._cast_pointwise_result(result) + return result @overload def any( From 494f1b0d469361c0f91d5d532f5488ac9b32cf7f Mon Sep 17 00:00:00 2001 From: nevrohelios Date: Tue, 9 Sep 2025 09:43:26 +0530 Subject: [PATCH 3/5] format --- .gitignore | 2 +- doc/source/reference/extensions.rst | 2 +- pandas/core/arrays/base.py | 9 ++++++--- pandas/core/arrays/masked.py | 9 ++++++--- 4 files changed, 14 insertions(+), 8 deletions(-) diff --git a/.gitignore b/.gitignore index 84d761ac112ce..03f035db527fe 100644 --- a/.gitignore +++ b/.gitignore @@ -143,4 +143,4 @@ doc/source/savefig/ /.pyodide-xbuildenv-* local.py -.venv/ \ No newline at end of file +.venv/ diff --git a/doc/source/reference/extensions.rst b/doc/source/reference/extensions.rst index 1f5d19d004a4c..ff93a3fd25104 100644 --- a/doc/source/reference/extensions.rst +++ b/doc/source/reference/extensions.rst @@ -58,7 +58,7 @@ objects. api.extensions.ExtensionArray.isin api.extensions.ExtensionArray.isna api.extensions.ExtensionArray.ravel - api.extensions.ExtensionArray.map + api.extensions.ExtensionArray.map api.extensions.ExtensionArray.repeat api.extensions.ExtensionArray.searchsorted api.extensions.ExtensionArray.shift diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index a9a8563ca5eeb..c37ee6c98079c 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -2516,9 +2516,12 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): return arraylike.default_array_ufunc(self, ufunc, method, *inputs, **kwargs) - def map(self, mapper, - na_action: Literal["ignore"] | None = None, - preserve_dtype: bool = False): + def map( + self, + mapper, + na_action: Literal["ignore"] | None = None, + preserve_dtype: bool = False, + ): """ Map values using an input mapping or function. diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index ff7d3d41c2052..9d0ee3f833218 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -1395,9 +1395,12 @@ def max(self, *, skipna: bool = True, axis: AxisInt | None = 0, **kwargs): ) return self._wrap_reduction_result("max", result, skipna=skipna, axis=axis) - def map(self, mapper, - na_action: Literal["ignore"] | None = None, - preserve_dtype: bool = False): + def map( + self, + mapper, + na_action: Literal["ignore"] | None = None, + preserve_dtype: bool = False, + ): """See ExtensionArray.map.""" result = map_array(self.to_numpy(), mapper, na_action=na_action) if preserve_dtype: From ca24177547fa07473b09a2ec5809c8f96e6b863f Mon Sep 17 00:00:00 2001 From: nevrohelios Date: Mon, 22 Sep 2025 22:05:48 +0530 Subject: [PATCH 4/5] ruff+mypy format --- pandas/core/arrays/arrow/array.py | 9 ++++++--- pandas/core/arrays/categorical.py | 4 ++++ pandas/core/arrays/datetimelike.py | 8 +++++++- pandas/core/arrays/sparse/array.py | 9 ++++++++- pandas/core/groupby/groupby.py | 2 +- pandas/core/groupby/indexing.py | 2 +- pandas/io/html.py | 4 ++-- pandas/tests/frame/test_query_eval.py | 24 +++++++----------------- 8 files changed, 36 insertions(+), 26 deletions(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index bb6a19536f427..ad4a43839eb2f 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -1612,9 +1612,12 @@ def to_numpy( result[~mask] = data[~mask]._pa_array.to_numpy() return result - def map(self, mapper, - na_action: Literal["ignore"] | None = None, - preserve_dtype: bool = False): + def map( + self, + mapper, + na_action: Literal["ignore"] | None = None, + preserve_dtype: bool = False, + ): if is_numeric_dtype(self.dtype): result = map_array(self.to_numpy(), mapper, na_action=na_action) if preserve_dtype: diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 4b5d2acf008a8..2723f628691bb 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1514,6 +1514,7 @@ def map( self, mapper, na_action: Literal["ignore"] | None = None, + preserve_dtype: bool = True, ): """ Map categories using an input mapping or function. @@ -1535,6 +1536,9 @@ def map( If 'ignore', propagate NaN values, without passing them to the mapping correspondence. + preserve_dtype : bool, default True + Please safely ignore this parameter. + Returns ------- pandas.Categorical or pandas.Index diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 561bf7d42d289..6cee4e87fc57f 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -742,7 +742,13 @@ def _unbox(self, other) -> np.int64 | np.datetime64 | np.timedelta64 | np.ndarra # pandas assumes they're there. @ravel_compat - def map(self, mapper, na_action: Literal["ignore"] | None = None): + def map( + self, + mapper, + na_action: Literal["ignore"] | None = None, + preserve_type: bool = True, + ): + """Safely ignore the `preserve_type` parameter""" from pandas import Index result = map_array(self, mapper, na_action=na_action) diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index ab5569537dc55..efc95b7e8f4ae 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -1326,7 +1326,12 @@ def astype(self, dtype: AstypeArg | None = None, copy: bool = True): return self._simple_new(sp_values, self.sp_index, dtype) - def map(self, mapper, na_action: Literal["ignore"] | None = None) -> Self: + def map( + self, + mapper, + na_action: Literal["ignore"] | None = None, + preserve_dtype: bool = True, + ) -> Self: """ Map categories using an input mapping or function. @@ -1337,6 +1342,8 @@ def map(self, mapper, na_action: Literal["ignore"] | None = None) -> Self: na_action : {None, 'ignore'}, default None If 'ignore', propagate NA values, without passing them to the mapping correspondence. + preserve_dtype : bool, default True + Please safely ignore this parameter. Returns ------- diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index f9789c82a1536..7f6cc7f660204 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -4321,7 +4321,7 @@ def nth(self) -> GroupByNthSelector: def _nth( self, n: PositionalIndexer | tuple, - dropna: Literal["any", "all", None] = None, + dropna: Literal["any", "all"] | None = None, ) -> NDFrameT: if not dropna: mask = self._make_mask_from_positional_indexer(n) diff --git a/pandas/core/groupby/indexing.py b/pandas/core/groupby/indexing.py index c658f625d5ea9..dc9cf9bcd1e64 100644 --- a/pandas/core/groupby/indexing.py +++ b/pandas/core/groupby/indexing.py @@ -296,7 +296,7 @@ def __init__(self, groupby_object: groupby.GroupBy) -> None: def __call__( self, n: PositionalIndexer | tuple, - dropna: Literal["any", "all", None] = None, + dropna: Literal["any", "all"] | None = None, ) -> DataFrame | Series: return self.groupby_object._nth(n, dropna) diff --git a/pandas/io/html.py b/pandas/io/html.py index 183af3a03221b..2f7f615596469 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -220,7 +220,7 @@ def __init__( attrs: dict[str, str] | None, encoding: str, displayed_only: bool, - extract_links: Literal[None, "header", "footer", "body", "all"], + extract_links: Literal["header", "footer", "body", "all"] | None, storage_options: StorageOptions = None, ) -> None: self.io = io @@ -1042,7 +1042,7 @@ def read_html( na_values: Iterable[object] | None = None, keep_default_na: bool = True, displayed_only: bool = True, - extract_links: Literal[None, "header", "footer", "body", "all"] = None, + extract_links: Literal["header", "footer", "body", "all"] | None = None, dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default, storage_options: StorageOptions = None, ) -> list[DataFrame]: diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index f93105498ac79..b31e8529b238b 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -160,21 +160,13 @@ def test_query_empty_string(self): df.query("") def test_query_duplicate_column_name(self, engine, parser): - df = DataFrame( - { - "A": range(3), - "B": range(3), - "C": range(3) - } - ).rename(columns={"B": "A"}) + df = DataFrame({"A": range(3), "B": range(3), "C": range(3)}).rename( + columns={"B": "A"} + ) - res = df.query('C == 1', engine=engine, parser=parser) + res = df.query("C == 1", engine=engine, parser=parser) - expect = DataFrame( - [[1, 1, 1]], - columns=["A", "A", "C"], - index=[1] - ) + expect = DataFrame([[1, 1, 1]], columns=["A", "A", "C"], index=[1]) tm.assert_frame_equal(res, expect) @@ -1140,9 +1132,7 @@ def test_query_with_nested_special_character(self, parser, engine): [">=", operator.ge], ], ) - def test_query_lex_compare_strings( - self, parser, engine, op, func - ): + def test_query_lex_compare_strings(self, parser, engine, op, func): a = Series(np.random.default_rng(2).choice(list("abcde"), 20)) b = Series(np.arange(a.size)) df = DataFrame({"X": a, "Y": b}) @@ -1411,7 +1401,7 @@ def test_expr_with_column_name_with_backtick_and_hash(self): def test_expr_with_column_name_with_backtick(self): # GH 59285 df = DataFrame({"a`b": (1, 2, 3), "ab": (4, 5, 6)}) - result = df.query("`a``b` < 2") # noqa + result = df.query("`a``b` < 2") # Note: Formatting checks may wrongly consider the above ``inline code``. expected = df[df["a`b"] < 2] tm.assert_frame_equal(result, expected) From ab7da79d0c799338d6d51f0c532c9ddbeb09a85e Mon Sep 17 00:00:00 2001 From: nevrohelios Date: Mon, 22 Sep 2025 23:55:32 +0530 Subject: [PATCH 5/5] DOC: Remove api.extensions.ExtensionArray.map from autosummary --- .gitignore | 1 - doc/source/reference/extensions.rst | 1 - 2 files changed, 2 deletions(-) diff --git a/.gitignore b/.gitignore index 03f035db527fe..d1378f397b763 100644 --- a/.gitignore +++ b/.gitignore @@ -142,5 +142,4 @@ doc/source/savefig/ ############################## /.pyodide-xbuildenv-* -local.py .venv/ diff --git a/doc/source/reference/extensions.rst b/doc/source/reference/extensions.rst index ff93a3fd25104..e412793a328a3 100644 --- a/doc/source/reference/extensions.rst +++ b/doc/source/reference/extensions.rst @@ -58,7 +58,6 @@ objects. api.extensions.ExtensionArray.isin api.extensions.ExtensionArray.isna api.extensions.ExtensionArray.ravel - api.extensions.ExtensionArray.map api.extensions.ExtensionArray.repeat api.extensions.ExtensionArray.searchsorted api.extensions.ExtensionArray.shift