Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -141,3 +141,5 @@ doc/source/savefig/
# Pyodide/WASM related files #
##############################
/.pyodide-xbuildenv-*

.venv/
14 changes: 11 additions & 3 deletions pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -1612,15 +1612,23 @@ def to_numpy(
result[~mask] = data[~mask]._pa_array.to_numpy()
return result

def map(self, mapper, na_action: Literal["ignore"] | None = None):
def map(
self,
mapper,
na_action: Literal["ignore"] | None = None,
preserve_dtype: bool = False,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we don't want this new keyword.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

as the current implementation is not working for all the types, do you have a suggestion on how I should approach this without introducing the new keyword?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same as i said here #62177 (comment)

):
if is_numeric_dtype(self.dtype):
return map_array(self.to_numpy(), mapper, na_action=na_action)
result = map_array(self.to_numpy(), mapper, na_action=na_action)
if preserve_dtype:
result = self._cast_pointwise_result(result)
return result
else:
# For "mM" cases, the super() method passes `self` without the
# to_numpy call, which inside map_array casts to ndarray[object].
# Without the to_numpy() call, NA is preserved instead of changed
# to None.
return super().map(mapper, na_action)
return super().map(mapper, na_action, preserve_dtype=preserve_dtype)

@doc(ExtensionArray.duplicated)
def duplicated(
Expand Down
18 changes: 16 additions & 2 deletions pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2538,7 +2538,12 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):

return arraylike.default_array_ufunc(self, ufunc, method, *inputs, **kwargs)

def map(self, mapper, na_action: Literal["ignore"] | None = None):
def map(
self,
mapper,
na_action: Literal["ignore"] | None = None,
preserve_dtype: bool = False,
):
"""
Map values using an input mapping or function.

Expand All @@ -2550,6 +2555,12 @@ def map(self, mapper, na_action: Literal["ignore"] | None = None):
If 'ignore', propagate NA values, without passing them to the
mapping correspondence. If 'ignore' is not supported, a
``NotImplementedError`` should be raised.
preserve_dtype : bool, default False
If True, attempt to cast the elementwise result back to the
original ExtensionArray type (and dtype) when possible. This is
primarily intended for identity or dtype-preserving mappings.
If False, the result of the mapping is returned as produced by
the underlying implementation (typically a NumPy ndarray).

Returns
-------
Expand All @@ -2558,7 +2569,10 @@ def map(self, mapper, na_action: Literal["ignore"] | None = None):
If the function returns a tuple with more than one element
a MultiIndex will be returned.
"""
return map_array(self, mapper, na_action=na_action)
results = map_array(self, mapper, na_action=na_action)
if preserve_dtype:
results = self._cast_pointwise_result(results)
return results

# ------------------------------------------------------------------------
# GroupBy Methods
Expand Down
4 changes: 4 additions & 0 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -1514,6 +1514,7 @@ def map(
self,
mapper,
na_action: Literal["ignore"] | None = None,
preserve_dtype: bool = True,
):
"""
Map categories using an input mapping or function.
Expand All @@ -1535,6 +1536,9 @@ def map(
If 'ignore', propagate NaN values, without passing them to the
mapping correspondence.

preserve_dtype : bool, default True
Please safely ignore this parameter.

Returns
-------
pandas.Categorical or pandas.Index
Expand Down
8 changes: 7 additions & 1 deletion pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -742,7 +742,13 @@ def _unbox(self, other) -> np.int64 | np.datetime64 | np.timedelta64 | np.ndarra
# pandas assumes they're there.

@ravel_compat
def map(self, mapper, na_action: Literal["ignore"] | None = None):
def map(
self,
mapper,
na_action: Literal["ignore"] | None = None,
preserve_type: bool = True,
):
"""Safely ignore the `preserve_type` parameter"""
from pandas import Index

result = map_array(self, mapper, na_action=na_action)
Expand Down
13 changes: 11 additions & 2 deletions pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -1397,8 +1397,17 @@ def max(self, *, skipna: bool = True, axis: AxisInt | None = 0, **kwargs):
)
return self._wrap_reduction_result("max", result, skipna=skipna, axis=axis)

def map(self, mapper, na_action: Literal["ignore"] | None = None):
return map_array(self.to_numpy(), mapper, na_action=na_action)
def map(
self,
mapper,
na_action: Literal["ignore"] | None = None,
preserve_dtype: bool = False,
):
"""See ExtensionArray.map."""
result = map_array(self.to_numpy(), mapper, na_action=na_action)
if preserve_dtype:
result = self._cast_pointwise_result(result)
return result

@overload
def any(
Expand Down
9 changes: 8 additions & 1 deletion pandas/core/arrays/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -1337,7 +1337,12 @@ def astype(self, dtype: AstypeArg | None = None, copy: bool = True):

return self._simple_new(sp_values, self.sp_index, dtype)

def map(self, mapper, na_action: Literal["ignore"] | None = None) -> Self:
def map(
self,
mapper,
na_action: Literal["ignore"] | None = None,
preserve_dtype: bool = True,
) -> Self:
"""
Map categories using an input mapping or function.

Expand All @@ -1348,6 +1353,8 @@ def map(self, mapper, na_action: Literal["ignore"] | None = None) -> Self:
na_action : {None, 'ignore'}, default None
If 'ignore', propagate NA values, without passing them to the
mapping correspondence.
preserve_dtype : bool, default True
Please safely ignore this parameter.

Returns
-------
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -4321,7 +4321,7 @@ def nth(self) -> GroupByNthSelector:
def _nth(
self,
n: PositionalIndexer | tuple,
dropna: Literal["any", "all", None] = None,
dropna: Literal["any", "all"] | None = None,
) -> NDFrameT:
if not dropna:
mask = self._make_mask_from_positional_indexer(n)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/groupby/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ def __init__(self, groupby_object: groupby.GroupBy) -> None:
def __call__(
self,
n: PositionalIndexer | tuple,
dropna: Literal["any", "all", None] = None,
dropna: Literal["any", "all"] | None = None,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pls avoid unrelated changes

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, but due to a ci check on formatting, I used ruff and it changed few files. Thanks for pointing out, I will revert it back.

) -> DataFrame | Series:
return self.groupby_object._nth(n, dropna)

Expand Down
4 changes: 2 additions & 2 deletions pandas/io/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ def __init__(
attrs: dict[str, str] | None,
encoding: str,
displayed_only: bool,
extract_links: Literal[None, "header", "footer", "body", "all"],
extract_links: Literal["header", "footer", "body", "all"] | None,
storage_options: StorageOptions = None,
) -> None:
self.io = io
Expand Down Expand Up @@ -1042,7 +1042,7 @@ def read_html(
na_values: Iterable[object] | None = None,
keep_default_na: bool = True,
displayed_only: bool = True,
extract_links: Literal[None, "header", "footer", "body", "all"] = None,
extract_links: Literal["header", "footer", "body", "all"] | None = None,
dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
storage_options: StorageOptions = None,
) -> list[DataFrame]:
Expand Down
20 changes: 5 additions & 15 deletions pandas/tests/frame/test_query_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,21 +160,13 @@ def test_query_empty_string(self):
df.query("")

def test_query_duplicate_column_name(self, engine, parser):
df = DataFrame(
{
"A": range(3),
"B": range(3),
"C": range(3)
}
).rename(columns={"B": "A"})
df = DataFrame({"A": range(3), "B": range(3), "C": range(3)}).rename(
columns={"B": "A"}
)

res = df.query("C == 1", engine=engine, parser=parser)

expect = DataFrame(
[[1, 1, 1]],
columns=["A", "A", "C"],
index=[1]
)
expect = DataFrame([[1, 1, 1]], columns=["A", "A", "C"], index=[1])

tm.assert_frame_equal(res, expect)

Expand Down Expand Up @@ -1140,9 +1132,7 @@ def test_query_with_nested_special_character(self, parser, engine):
[">=", operator.ge],
],
)
def test_query_lex_compare_strings(
self, parser, engine, op, func
):
def test_query_lex_compare_strings(self, parser, engine, op, func):
a = Series(np.random.default_rng(2).choice(list("abcde"), 20))
b = Series(np.arange(a.size))
df = DataFrame({"X": a, "Y": b})
Expand Down
Loading