Skip to content

Commit dd0a6a2

Browse files
still raise a warning when it would cast from numeric to string
1 parent d95620b commit dd0a6a2

File tree

5 files changed

+38
-57
lines changed

5 files changed

+38
-57
lines changed

pandas/core/internals/blocks.py

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -643,7 +643,10 @@ def convert(
643643

644644
if self.ndim != 1 and self.shape[0] != 1:
645645
blocks = self.split_and_operate(
646-
Block.convert, copy=copy, using_cow=using_cow
646+
Block.convert,
647+
copy=copy,
648+
using_cow=using_cow,
649+
convert_string=convert_string,
647650
)
648651
if all(blk.dtype.kind == "O" for blk in blocks):
649652
# Avoid fragmenting the block if convert is a no-op
@@ -847,6 +850,7 @@ def replace(
847850
mask: npt.NDArray[np.bool_] | None = None,
848851
using_cow: bool = False,
849852
already_warned=None,
853+
convert_string=None,
850854
) -> list[Block]:
851855
"""
852856
replace the to_replace value with value, possible to create new
@@ -912,7 +916,9 @@ def replace(
912916
blocks = [blk]
913917
else:
914918
blocks = blk.convert(
915-
copy=False, using_cow=using_cow, convert_string=False
919+
copy=False,
920+
using_cow=using_cow,
921+
convert_string=convert_string or self.dtype != _dtype_obj,
916922
)
917923
if len(blocks) > 1 or blocks[0].dtype != blk.dtype:
918924
warnings.warn(
@@ -941,6 +947,7 @@ def replace(
941947
value=value,
942948
inplace=True,
943949
mask=mask,
950+
convert_string=convert_string,
944951
)
945952

946953
else:
@@ -955,6 +962,7 @@ def replace(
955962
inplace=True,
956963
mask=mask[i : i + 1],
957964
using_cow=using_cow,
965+
convert_string=convert_string,
958966
)
959967
)
960968
return blocks
@@ -1016,7 +1024,9 @@ def _replace_regex(
10161024
)
10171025
already_warned.warned_already = True
10181026

1019-
nbs = block.convert(copy=False, using_cow=using_cow, convert_string=False)
1027+
nbs = block.convert(
1028+
copy=False, using_cow=using_cow, convert_string=self.dtype != _dtype_obj
1029+
)
10201030
opt = get_option("future.no_silent_downcasting")
10211031
if (len(nbs) > 1 or nbs[0].dtype != block.dtype) and not opt:
10221032
warnings.warn(
@@ -1047,6 +1057,8 @@ def replace_list(
10471057
"""
10481058
values = self.values
10491059

1060+
convert_string = self.dtype != _dtype_obj
1061+
10501062
if isinstance(values, Categorical):
10511063
# TODO: avoid special-casing
10521064
# GH49404
@@ -1137,6 +1149,7 @@ def replace_list(
11371149
inplace=inplace,
11381150
regex=regex,
11391151
using_cow=using_cow,
1152+
convert_string=convert_string,
11401153
)
11411154

11421155
if using_cow and i != src_len:
@@ -1161,7 +1174,7 @@ def replace_list(
11611174
converted = res_blk.convert(
11621175
copy=True and not using_cow,
11631176
using_cow=using_cow,
1164-
convert_string=False,
1177+
convert_string=convert_string,
11651178
)
11661179
if len(converted) > 1 or converted[0].dtype != res_blk.dtype:
11671180
warnings.warn(
@@ -1191,6 +1204,7 @@ def _replace_coerce(
11911204
inplace: bool = True,
11921205
regex: bool = False,
11931206
using_cow: bool = False,
1207+
convert_string: bool = True,
11941208
) -> list[Block]:
11951209
"""
11961210
Replace value corresponding to the given boolean array with another
@@ -1243,6 +1257,7 @@ def _replace_coerce(
12431257
inplace=inplace,
12441258
mask=mask,
12451259
using_cow=using_cow,
1260+
convert_string=convert_string,
12461261
)
12471262

12481263
# ---------------------------------------------------------------------

pandas/tests/frame/methods/test_fillna.py

Lines changed: 5 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -132,21 +132,14 @@ def test_fillna_different_dtype(self, using_infer_string):
132132
[["a", "a", np.nan, "a"], ["b", "b", np.nan, "b"], ["c", "c", np.nan, "c"]]
133133
)
134134

135-
if using_infer_string:
136-
with tm.assert_produces_warning(FutureWarning, match="Downcasting"):
137-
result = df.fillna({2: "foo"})
138-
else:
139-
result = df.fillna({2: "foo"})
135+
result = df.fillna({2: "foo"})
140136
expected = DataFrame(
141137
[["a", "a", "foo", "a"], ["b", "b", "foo", "b"], ["c", "c", "foo", "c"]]
142138
)
139+
expected[2] = expected[2].astype("object")
143140
tm.assert_frame_equal(result, expected)
144141

145-
if using_infer_string:
146-
with tm.assert_produces_warning(FutureWarning, match="Downcasting"):
147-
return_value = df.fillna({2: "foo"}, inplace=True)
148-
else:
149-
return_value = df.fillna({2: "foo"}, inplace=True)
142+
return_value = df.fillna({2: "foo"}, inplace=True)
150143
tm.assert_frame_equal(df, expected)
151144
assert return_value is None
152145

@@ -385,12 +378,8 @@ def test_fillna_dtype_conversion(self, using_infer_string):
385378

386379
# empty block
387380
df = DataFrame(index=range(3), columns=["A", "B"], dtype="float64")
388-
if using_infer_string:
389-
with tm.assert_produces_warning(FutureWarning, match="Downcasting"):
390-
result = df.fillna("nan")
391-
else:
392-
result = df.fillna("nan")
393-
expected = DataFrame("nan", index=range(3), columns=["A", "B"])
381+
result = df.fillna("nan")
382+
expected = DataFrame("nan", index=range(3), columns=["A", "B"], dtype=object)
394383
tm.assert_frame_equal(result, expected)
395384

396385
@pytest.mark.parametrize("val", ["", 1, np.nan, 1.0])

pandas/tests/frame/methods/test_replace.py

Lines changed: 5 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -281,20 +281,12 @@ def test_regex_replace_dict_nested(self, mix_abc):
281281
tm.assert_frame_equal(res3, expec)
282282
tm.assert_frame_equal(res4, expec)
283283

284-
def test_regex_replace_dict_nested_non_first_character(
285-
self, any_string_dtype, using_infer_string
286-
):
284+
def test_regex_replace_dict_nested_non_first_character(self, any_string_dtype):
287285
# GH 25259
288286
dtype = any_string_dtype
289287
df = DataFrame({"first": ["abc", "bca", "cab"]}, dtype=dtype)
290-
if using_infer_string and any_string_dtype == "object":
291-
with tm.assert_produces_warning(FutureWarning, match="Downcasting"):
292-
result = df.replace({"a": "."}, regex=True)
293-
expected = DataFrame({"first": [".bc", "bc.", "c.b"]})
294-
295-
else:
296-
result = df.replace({"a": "."}, regex=True)
297-
expected = DataFrame({"first": [".bc", "bc.", "c.b"]}, dtype=dtype)
288+
result = df.replace({"a": "."}, regex=True)
289+
expected = DataFrame({"first": [".bc", "bc.", "c.b"]}, dtype=dtype)
298290
tm.assert_frame_equal(result, expected)
299291

300292
@pytest.mark.xfail(using_string_dtype(), reason="can't set float into string")
@@ -430,31 +422,12 @@ def test_replace_regex_metachar(self, metachar):
430422
],
431423
)
432424
def test_regex_replace_string_types(
433-
self,
434-
data,
435-
to_replace,
436-
expected,
437-
frame_or_series,
438-
any_string_dtype,
439-
using_infer_string,
440-
request,
425+
self, data, to_replace, expected, frame_or_series, any_string_dtype
441426
):
442427
# GH-41333, GH-35977
443428
dtype = any_string_dtype
444429
obj = frame_or_series(data, dtype=dtype)
445-
if using_infer_string and any_string_dtype == "object":
446-
if len(to_replace) > 1 and isinstance(obj, DataFrame):
447-
request.node.add_marker(
448-
pytest.mark.xfail(
449-
reason="object input array that gets downcasted raises on "
450-
"second pass"
451-
)
452-
)
453-
with tm.assert_produces_warning(FutureWarning, match="Downcasting"):
454-
result = obj.replace(to_replace, regex=True)
455-
dtype = "str"
456-
else:
457-
result = obj.replace(to_replace, regex=True)
430+
result = obj.replace(to_replace, regex=True)
458431
expected = frame_or_series(expected, dtype=dtype)
459432

460433
tm.assert_equal(result, expected)

pandas/tests/indexing/test_coercion.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -831,7 +831,7 @@ def replacer(self, how, from_key, to_key):
831831
raise ValueError
832832
return replacer
833833

834-
def test_replace_series(self, how, to_key, from_key, replacer):
834+
def test_replace_series(self, how, to_key, from_key, replacer, using_infer_string):
835835
index = pd.Index([3, 4], name="xxx")
836836
obj = pd.Series(self.rep[from_key], index=index, name="yyy")
837837
obj = obj.astype(from_key)
@@ -856,6 +856,10 @@ def test_replace_series(self, how, to_key, from_key, replacer):
856856
else:
857857
exp = pd.Series(self.rep[to_key], index=index, name="yyy")
858858

859+
if using_infer_string and exp.dtype == "string" and obj.dtype == object:
860+
# with infer_string, we disable the deprecated downcasting behavior
861+
exp = exp.astype(object)
862+
859863
msg = "Downcasting behavior in `replace`"
860864
warn = FutureWarning
861865
if (

pandas/tests/series/methods/test_replace.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -764,14 +764,14 @@ def test_replace_value_none_dtype_numeric(self, val):
764764
def test_replace_change_dtype_series(self, using_infer_string):
765765
# GH#25797
766766
df = pd.DataFrame.from_dict({"Test": ["0.5", True, "0.6"]})
767-
warn = FutureWarning if using_infer_string else None
768-
with tm.assert_produces_warning(warn, match="Downcasting"):
769-
df["Test"] = df["Test"].replace([True], [np.nan])
770-
expected = pd.DataFrame.from_dict({"Test": ["0.5", np.nan, "0.6"]})
767+
df["Test"] = df["Test"].replace([True], [np.nan])
768+
expected = pd.DataFrame({"Test": ["0.5", np.nan, "0.6"]}, dtype=object)
771769
tm.assert_frame_equal(df, expected)
772770

773771
df = pd.DataFrame.from_dict({"Test": ["0.5", None, "0.6"]})
774772
df["Test"] = df["Test"].replace([None], [np.nan])
773+
if using_infer_string:
774+
expected = expected.astype("str")
775775
tm.assert_frame_equal(df, expected)
776776

777777
df = pd.DataFrame.from_dict({"Test": ["0.5", None, "0.6"]})

0 commit comments

Comments
 (0)