Skip to content

Commit 9731c60

Browse files
Merge remote-tracking branch 'upstream/2.3.x' into depr/array-copy-false-futurewarning
2 parents f9fc8e8 + e53967b commit 9731c60

File tree

19 files changed

+125
-133
lines changed

19 files changed

+125
-133
lines changed

pandas/core/array_algos/replace.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,4 +149,6 @@ def re_replacer(s):
149149
if mask is None:
150150
values[:] = f(values)
151151
else:
152+
if values.ndim != mask.ndim:
153+
mask = np.broadcast_to(mask, values.shape)
152154
values[mask] = f(values[mask])

pandas/core/internals/blocks.py

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -929,7 +929,7 @@ def replace(
929929
blocks = blk.convert(
930930
copy=False,
931931
using_cow=using_cow,
932-
convert_string=convert_string or self.dtype != _dtype_obj,
932+
convert_string=convert_string or self.dtype == "string",
933933
)
934934
if len(blocks) > 1 or blocks[0].dtype != blk.dtype:
935935
warnings.warn(
@@ -987,7 +987,7 @@ def _replace_regex(
987987
inplace: bool = False,
988988
mask=None,
989989
using_cow: bool = False,
990-
convert_string: bool = True,
990+
convert_string=None,
991991
already_warned=None,
992992
) -> list[Block]:
993993
"""
@@ -1048,10 +1048,18 @@ def _replace_regex(
10481048
already_warned.warned_already = True
10491049

10501050
nbs = block.convert(
1051-
copy=False, using_cow=using_cow, convert_string=convert_string
1051+
copy=False,
1052+
using_cow=using_cow,
1053+
convert_string=convert_string or self.dtype == "string",
10521054
)
10531055
opt = get_option("future.no_silent_downcasting")
1054-
if (len(nbs) > 1 or nbs[0].dtype != block.dtype) and not opt:
1056+
if (
1057+
len(nbs) > 1
1058+
or (
1059+
nbs[0].dtype != block.dtype
1060+
and not (self.dtype == "string" and nbs[0].dtype == "string")
1061+
)
1062+
) and not opt:
10551063
warnings.warn(
10561064
# GH#54710
10571065
"Downcasting behavior in `replace` is deprecated and "
@@ -1088,7 +1096,7 @@ def replace_list(
10881096
values._replace(to_replace=src_list, value=dest_list, inplace=True)
10891097
return [blk]
10901098

1091-
convert_string = self.dtype != _dtype_obj
1099+
convert_string = self.dtype == "string"
10921100

10931101
# Exclude anything that we know we won't contain
10941102
pairs = [
@@ -2167,6 +2175,13 @@ def where(
21672175
if isinstance(self.dtype, (IntervalDtype, StringDtype)):
21682176
# TestSetitemFloatIntervalWithIntIntervalValues
21692177
blk = self.coerce_to_target_dtype(orig_other)
2178+
if (
2179+
self.ndim == 2
2180+
and isinstance(orig_cond, np.ndarray)
2181+
and orig_cond.ndim == 1
2182+
and not is_1d_only_ea_dtype(blk.dtype)
2183+
):
2184+
orig_cond = orig_cond[:, None]
21702185
nbs = blk.where(orig_other, orig_cond, using_cow=using_cow)
21712186
return self._maybe_downcast(
21722187
nbs, downcast=_downcast, using_cow=using_cow, caller="where"

pandas/io/formats/style.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1580,7 +1580,7 @@ def _update_ctx_header(self, attrs: DataFrame, axis: AxisInt) -> None:
15801580
for j in attrs.columns:
15811581
ser = attrs[j]
15821582
for i, c in ser.items():
1583-
if not c:
1583+
if not c or pd.isna(c):
15841584
continue
15851585
css_list = maybe_convert_css_to_tuples(c)
15861586
if axis == 0:

pandas/io/pytables.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5274,6 +5274,8 @@ def _dtype_to_kind(dtype_str: str) -> str:
52745274
kind = "integer"
52755275
elif dtype_str == "object":
52765276
kind = "object"
5277+
elif dtype_str == "str":
5278+
kind = "str"
52775279
else:
52785280
raise ValueError(f"cannot interpret dtype of [{dtype_str}]")
52795281

pandas/tests/base/test_conversion.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
import numpy as np
22
import pytest
33

4-
from pandas._config import using_string_dtype
5-
64
from pandas.compat import HAS_PYARROW
75
from pandas.compat.numpy import np_version_gt2
86

@@ -391,9 +389,6 @@ def test_to_numpy(arr, expected, zero_copy, index_or_series_or_array):
391389
assert np.may_share_memory(result_nocopy1, result_nocopy2)
392390

393391

394-
@pytest.mark.xfail(
395-
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False
396-
)
397392
@pytest.mark.parametrize("as_series", [True, False])
398393
@pytest.mark.parametrize(
399394
"arr", [np.array([1, 2, 3], dtype="int64"), np.array(["a", "b", "c"], dtype=object)]
@@ -405,13 +400,13 @@ def test_to_numpy_copy(arr, as_series, using_infer_string):
405400

406401
# no copy by default
407402
result = obj.to_numpy()
408-
if using_infer_string and arr.dtype == object:
403+
if using_infer_string and arr.dtype == object and obj.dtype.storage == "pyarrow":
409404
assert np.shares_memory(arr, result) is False
410405
else:
411406
assert np.shares_memory(arr, result) is True
412407

413408
result = obj.to_numpy(copy=False)
414-
if using_infer_string and arr.dtype == object:
409+
if using_infer_string and arr.dtype == object and obj.dtype.storage == "pyarrow":
415410
assert np.shares_memory(arr, result) is False
416411
else:
417412
assert np.shares_memory(arr, result) is True

pandas/tests/frame/methods/test_cov_corr.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
import numpy as np
22
import pytest
33

4-
from pandas._config import using_string_dtype
5-
64
import pandas.util._test_decorators as td
75

86
import pandas as pd
@@ -328,7 +326,6 @@ def test_corrwith(self, datetime_frame, dtype):
328326
for row in index[:4]:
329327
tm.assert_almost_equal(correls[row], df1.loc[row].corr(df2.loc[row]))
330328

331-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
332329
def test_corrwith_with_objects(self, using_infer_string):
333330
df1 = DataFrame(
334331
np.random.default_rng(2).standard_normal((10, 4)),
@@ -342,9 +339,8 @@ def test_corrwith_with_objects(self, using_infer_string):
342339
df2["obj"] = "bar"
343340

344341
if using_infer_string:
345-
import pyarrow as pa
346-
347-
with pytest.raises(pa.lib.ArrowNotImplementedError, match="has no kernel"):
342+
msg = "Cannot perform reduction 'mean' with string dtype"
343+
with pytest.raises(TypeError, match=msg):
348344
df1.corrwith(df2)
349345
else:
350346
with pytest.raises(TypeError, match="Could not convert"):

pandas/tests/frame/methods/test_dtypes.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
import numpy as np
44
import pytest
55

6-
from pandas._config import using_string_dtype
7-
86
from pandas.core.dtypes.dtypes import DatetimeTZDtype
97

108
import pandas as pd
@@ -144,13 +142,9 @@ def test_dtypes_timedeltas(self):
144142
)
145143
tm.assert_series_equal(result, expected)
146144

147-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
148145
def test_frame_apply_np_array_return_type(self, using_infer_string):
149146
# GH 35517
150147
df = DataFrame([["foo"]])
151148
result = df.apply(lambda col: np.array("bar"))
152-
if using_infer_string:
153-
expected = Series([np.array(["bar"])])
154-
else:
155-
expected = Series(["bar"])
149+
expected = Series(np.array("bar"))
156150
tm.assert_series_equal(result, expected)

pandas/tests/frame/methods/test_fillna.py

Lines changed: 6 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
import numpy as np
22
import pytest
33

4-
from pandas._config import using_string_dtype
5-
64
import pandas.util._test_decorators as td
75

86
from pandas import (
@@ -91,8 +89,6 @@ def test_fillna_datetime(self, datetime_frame):
9189
with pytest.raises(ValueError, match=msg):
9290
datetime_frame.fillna(5, method="ffill")
9391

94-
# TODO(infer_string) test as actual error instead of xfail
95-
@pytest.mark.xfail(using_string_dtype(), reason="can't fill 0 in string")
9692
def test_fillna_mixed_type(self, float_string_frame):
9793
mf = float_string_frame
9894
mf.loc[mf.index[5:20], "foo"] = np.nan
@@ -126,7 +122,7 @@ def test_fillna_empty(self, using_copy_on_write):
126122
df.x.fillna(method=m, inplace=True)
127123
df.x.fillna(method=m)
128124

129-
def test_fillna_different_dtype(self, using_infer_string):
125+
def test_fillna_different_dtype(self):
130126
# with different dtype (GH#3386)
131127
df = DataFrame(
132128
[["a", "a", np.nan, "a"], ["b", "b", np.nan, "b"], ["c", "c", np.nan, "c"]]
@@ -136,6 +132,7 @@ def test_fillna_different_dtype(self, using_infer_string):
136132
expected = DataFrame(
137133
[["a", "a", "foo", "a"], ["b", "b", "foo", "b"], ["c", "c", "foo", "c"]]
138134
)
135+
# column is originally float (all-NaN) -> filling with string gives object dtype
139136
expected[2] = expected[2].astype("object")
140137
tm.assert_frame_equal(result, expected)
141138

@@ -654,18 +651,10 @@ def test_fillna_col_reordering(self):
654651
filled = df.fillna(method="ffill")
655652
assert df.columns.tolist() == filled.columns.tolist()
656653

657-
# TODO(infer_string) test as actual error instead of xfail
658-
@pytest.mark.xfail(using_string_dtype(), reason="can't fill 0 in string")
659-
def test_fill_corner(self, float_frame, float_string_frame):
660-
mf = float_string_frame
661-
mf.loc[mf.index[5:20], "foo"] = np.nan
662-
mf.loc[mf.index[-10:], "A"] = np.nan
663-
664-
filled = float_string_frame.fillna(value=0)
665-
assert (filled.loc[filled.index[5:20], "foo"] == 0).all()
666-
del float_string_frame["foo"]
667-
668-
float_frame.reindex(columns=[]).fillna(value=0)
654+
def test_fill_empty(self, float_frame):
655+
df = float_frame.reindex(columns=[])
656+
result = df.fillna(value=0)
657+
tm.assert_frame_equal(result, df)
669658

670659
def test_fillna_downcast_dict(self):
671660
# GH#40809

0 commit comments

Comments
 (0)