Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandas-stubs/_typing.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -740,7 +740,7 @@ ReplaceValue: TypeAlias = (
| Pattern
| NAType
| Sequence[Scalar | Pattern]
| Mapping[Hashable, Scalar]
| Mapping[HashableT, ScalarT]
| Series[Any]
| None
)
Expand Down
12 changes: 6 additions & 6 deletions pandas-stubs/core/frame.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -797,20 +797,20 @@ class DataFrame(NDFrame, OpsMixin, _GetItemHack):
@overload
def replace(
self,
to_replace: ReplaceValue | Mapping[Hashable, ReplaceValue] = ...,
value: ReplaceValue | Mapping[Hashable, ReplaceValue] = ...,
to_replace: ReplaceValue | Mapping[HashableT2, ReplaceValue] = ...,
value: ReplaceValue | Mapping[HashableT2, ReplaceValue] = ...,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should be HashableT3 since it might not be the same type as HashableT2.

Same here and throughout

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've made the change, but I think if someone writes df.replace({'a': 1}, {0: 9}) then that's likely a user error. Though there's likely some valid usage I can't think, so perhaps best to err on the side of not letting false positives through

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've made the change, but I think if someone writes df.replace({'a': 1}, {0: 9}) then that's likely a user error. Though there's likely some valid usage I can't think, so perhaps best to err on the side of not letting false positives through

Yes, I agree. While this is valid:

>>> import pandas as pd
>>> df = pd.DataFrame([["a", 1], ["b", 2]], columns=["ab", "num"])
>>> df
  ab  num
0  a    1
1  b    2
>>> df.replace({"a":"foo", 2: 20})
    ab  num
0  foo    1
1    b   20

it's not a great idea to be mixing types in one statement. With the stubs, I try to support "normal" usage of pandas. Otherwise, the stubs get too wide and don't catch common errors.

*,
inplace: Literal[True],
regex: ReplaceValue | Mapping[Hashable, ReplaceValue] = ...,
regex: ReplaceValue | Mapping[HashableT2, ReplaceValue] = ...,
) -> None: ...
@overload
def replace(
self,
to_replace: ReplaceValue | Mapping[Hashable, ReplaceValue] = ...,
value: ReplaceValue | Mapping[Hashable, ReplaceValue] = ...,
to_replace: ReplaceValue | Mapping[HashableT2, ReplaceValue] = ...,
value: ReplaceValue | Mapping[HashableT2, ReplaceValue] = ...,
*,
inplace: Literal[False] = ...,
regex: ReplaceValue | Mapping[Hashable, ReplaceValue] = ...,
regex: ReplaceValue | Mapping[HashableT2, ReplaceValue] = ...,
) -> Self: ...
def shift(
self,
Expand Down
6 changes: 5 additions & 1 deletion tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2574,8 +2574,10 @@ def test_types_replace() -> None:


def test_dataframe_replace() -> None:
df = pd.DataFrame({"col1": ["a", "ab", "ba"]})
df = pd.DataFrame({"col1": ["a", "ab", "ba"], "col2": [0, 1, 2]})
pattern = re.compile(r"^a.*")
replace_dict_scalar = {0: 1}
replace_dict_per_column = {"col2": {0: 1}}
check(assert_type(df.replace("a", "x"), pd.DataFrame), pd.DataFrame)
check(assert_type(df.replace(pattern, "x"), pd.DataFrame), pd.DataFrame)
check(assert_type(df.replace("a", "x", regex=True), pd.DataFrame), pd.DataFrame)
Expand All @@ -2592,6 +2594,7 @@ def test_dataframe_replace() -> None:
)

check(assert_type(df.replace({"a": "x"}), pd.DataFrame), pd.DataFrame)
check(assert_type(df.replace(replace_dict_scalar), pd.DataFrame), pd.DataFrame)
check(assert_type(df.replace({pattern: "x"}), pd.DataFrame), pd.DataFrame)
check(assert_type(df.replace(pd.Series({"a": "x"})), pd.DataFrame), pd.DataFrame)
check(assert_type(df.replace(regex={"a": "x"}), pd.DataFrame), pd.DataFrame)
Expand Down Expand Up @@ -2670,6 +2673,7 @@ def test_dataframe_replace() -> None:
)

check(assert_type(df.replace({"col1": {"a": "x"}}), pd.DataFrame), pd.DataFrame)
check(assert_type(df.replace(replace_dict_per_column), pd.DataFrame), pd.DataFrame)
check(assert_type(df.replace({"col1": {pattern: "x"}}), pd.DataFrame), pd.DataFrame)
check(
assert_type(df.replace({"col1": pd.Series({"a": "x"})}), pd.DataFrame),
Expand Down
5 changes: 5 additions & 0 deletions tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1448,12 +1448,17 @@ def test_types_replace() -> None:
def test_series_replace() -> None:
s: pd.Series[str] = pd.DataFrame({"col1": ["a", "ab", "ba"]})["col1"]
pattern = re.compile(r"^a.*")
replace_dict = {"a": "b"}
check(assert_type(s.replace("a", "x"), "pd.Series[str]"), pd.Series)
check(assert_type(s.replace(pattern, "x"), "pd.Series[str]"), pd.Series)
check(
assert_type(s.replace({"a": "z"}), "pd.Series[str]"),
pd.Series,
)
check(
assert_type(s.replace(replace_dict), "pd.Series[str]"),
pd.Series,
)
check(
assert_type(s.replace(pd.Series({"a": "z"})), "pd.Series[str]"),
pd.Series,
Expand Down