diff --git a/.gitignore b/.gitignore index d951f3fb9cbad..d1378f397b763 100644 --- a/.gitignore +++ b/.gitignore @@ -141,3 +141,5 @@ doc/source/savefig/ # Pyodide/WASM related files # ############################## /.pyodide-xbuildenv-* + +.venv/ diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 653a900fbfe45..ad4a43839eb2f 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -1612,15 +1612,23 @@ def to_numpy( result[~mask] = data[~mask]._pa_array.to_numpy() return result - def map(self, mapper, na_action: Literal["ignore"] | None = None): + def map( + self, + mapper, + na_action: Literal["ignore"] | None = None, + preserve_dtype: bool = False, + ): if is_numeric_dtype(self.dtype): - return map_array(self.to_numpy(), mapper, na_action=na_action) + result = map_array(self.to_numpy(), mapper, na_action=na_action) + if preserve_dtype: + result = self._cast_pointwise_result(result) + return result else: # For "mM" cases, the super() method passes `self` without the # to_numpy call, which inside map_array casts to ndarray[object]. # Without the to_numpy() call, NA is preserved instead of changed # to None. - return super().map(mapper, na_action) + return super().map(mapper, na_action, preserve_dtype=preserve_dtype) @doc(ExtensionArray.duplicated) def duplicated( diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 84ec38e2f75d1..b9e304b31744d 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -2538,7 +2538,12 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): return arraylike.default_array_ufunc(self, ufunc, method, *inputs, **kwargs) - def map(self, mapper, na_action: Literal["ignore"] | None = None): + def map( + self, + mapper, + na_action: Literal["ignore"] | None = None, + preserve_dtype: bool = False, + ): """ Map values using an input mapping or function. @@ -2550,6 +2555,12 @@ def map(self, mapper, na_action: Literal["ignore"] | None = None): If 'ignore', propagate NA values, without passing them to the mapping correspondence. If 'ignore' is not supported, a ``NotImplementedError`` should be raised. + preserve_dtype : bool, default False + If True, attempt to cast the elementwise result back to the + original ExtensionArray type (and dtype) when possible. This is + primarily intended for identity or dtype-preserving mappings. + If False, the result of the mapping is returned as produced by + the underlying implementation (typically a NumPy ndarray). Returns ------- @@ -2558,7 +2569,10 @@ def map(self, mapper, na_action: Literal["ignore"] | None = None): If the function returns a tuple with more than one element a MultiIndex will be returned. """ - return map_array(self, mapper, na_action=na_action) + results = map_array(self, mapper, na_action=na_action) + if preserve_dtype: + results = self._cast_pointwise_result(results) + return results # ------------------------------------------------------------------------ # GroupBy Methods diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 4b5d2acf008a8..2723f628691bb 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1514,6 +1514,7 @@ def map( self, mapper, na_action: Literal["ignore"] | None = None, + preserve_dtype: bool = True, ): """ Map categories using an input mapping or function. @@ -1535,6 +1536,9 @@ def map( If 'ignore', propagate NaN values, without passing them to the mapping correspondence. + preserve_dtype : bool, default True + Please safely ignore this parameter. + Returns ------- pandas.Categorical or pandas.Index diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index b93d1ae408400..40d10a47c19da 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -742,7 +742,13 @@ def _unbox(self, other) -> np.int64 | np.datetime64 | np.timedelta64 | np.ndarra # pandas assumes they're there. @ravel_compat - def map(self, mapper, na_action: Literal["ignore"] | None = None): + def map( + self, + mapper, + na_action: Literal["ignore"] | None = None, + preserve_type: bool = True, + ): + """Safely ignore the `preserve_type` parameter""" from pandas import Index result = map_array(self, mapper, na_action=na_action) diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index bddca5bed6ff8..af468ad0e75d1 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -1397,8 +1397,17 @@ def max(self, *, skipna: bool = True, axis: AxisInt | None = 0, **kwargs): ) return self._wrap_reduction_result("max", result, skipna=skipna, axis=axis) - def map(self, mapper, na_action: Literal["ignore"] | None = None): - return map_array(self.to_numpy(), mapper, na_action=na_action) + def map( + self, + mapper, + na_action: Literal["ignore"] | None = None, + preserve_dtype: bool = False, + ): + """See ExtensionArray.map.""" + result = map_array(self.to_numpy(), mapper, na_action=na_action) + if preserve_dtype: + result = self._cast_pointwise_result(result) + return result @overload def any( diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index c04f3716f4739..9d04de69788fe 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -1337,7 +1337,12 @@ def astype(self, dtype: AstypeArg | None = None, copy: bool = True): return self._simple_new(sp_values, self.sp_index, dtype) - def map(self, mapper, na_action: Literal["ignore"] | None = None) -> Self: + def map( + self, + mapper, + na_action: Literal["ignore"] | None = None, + preserve_dtype: bool = True, + ) -> Self: """ Map categories using an input mapping or function. @@ -1348,6 +1353,8 @@ def map(self, mapper, na_action: Literal["ignore"] | None = None) -> Self: na_action : {None, 'ignore'}, default None If 'ignore', propagate NA values, without passing them to the mapping correspondence. + preserve_dtype : bool, default True + Please safely ignore this parameter. Returns ------- diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index f9789c82a1536..7f6cc7f660204 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -4321,7 +4321,7 @@ def nth(self) -> GroupByNthSelector: def _nth( self, n: PositionalIndexer | tuple, - dropna: Literal["any", "all", None] = None, + dropna: Literal["any", "all"] | None = None, ) -> NDFrameT: if not dropna: mask = self._make_mask_from_positional_indexer(n) diff --git a/pandas/core/groupby/indexing.py b/pandas/core/groupby/indexing.py index c658f625d5ea9..dc9cf9bcd1e64 100644 --- a/pandas/core/groupby/indexing.py +++ b/pandas/core/groupby/indexing.py @@ -296,7 +296,7 @@ def __init__(self, groupby_object: groupby.GroupBy) -> None: def __call__( self, n: PositionalIndexer | tuple, - dropna: Literal["any", "all", None] = None, + dropna: Literal["any", "all"] | None = None, ) -> DataFrame | Series: return self.groupby_object._nth(n, dropna) diff --git a/pandas/io/html.py b/pandas/io/html.py index 183af3a03221b..2f7f615596469 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -220,7 +220,7 @@ def __init__( attrs: dict[str, str] | None, encoding: str, displayed_only: bool, - extract_links: Literal[None, "header", "footer", "body", "all"], + extract_links: Literal["header", "footer", "body", "all"] | None, storage_options: StorageOptions = None, ) -> None: self.io = io @@ -1042,7 +1042,7 @@ def read_html( na_values: Iterable[object] | None = None, keep_default_na: bool = True, displayed_only: bool = True, - extract_links: Literal[None, "header", "footer", "body", "all"] = None, + extract_links: Literal["header", "footer", "body", "all"] | None = None, dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default, storage_options: StorageOptions = None, ) -> list[DataFrame]: diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index b599be5d042fe..b31e8529b238b 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -160,21 +160,13 @@ def test_query_empty_string(self): df.query("") def test_query_duplicate_column_name(self, engine, parser): - df = DataFrame( - { - "A": range(3), - "B": range(3), - "C": range(3) - } - ).rename(columns={"B": "A"}) + df = DataFrame({"A": range(3), "B": range(3), "C": range(3)}).rename( + columns={"B": "A"} + ) res = df.query("C == 1", engine=engine, parser=parser) - expect = DataFrame( - [[1, 1, 1]], - columns=["A", "A", "C"], - index=[1] - ) + expect = DataFrame([[1, 1, 1]], columns=["A", "A", "C"], index=[1]) tm.assert_frame_equal(res, expect) @@ -1140,9 +1132,7 @@ def test_query_with_nested_special_character(self, parser, engine): [">=", operator.ge], ], ) - def test_query_lex_compare_strings( - self, parser, engine, op, func - ): + def test_query_lex_compare_strings(self, parser, engine, op, func): a = Series(np.random.default_rng(2).choice(list("abcde"), 20)) b = Series(np.arange(a.size)) df = DataFrame({"X": a, "Y": b})