Skip to content

Commit ee08fd5

Browse files
authored
Merge branch 'main' into bugfix-spss-kwargs
2 parents 94800db + 46163c5 commit ee08fd5

File tree

9 files changed

+60
-7
lines changed

9 files changed

+60
-7
lines changed

doc/source/whatsnew/v2.2.1.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ Fixed regressions
1616
- Fixed memory leak in :func:`read_csv` (:issue:`57039`)
1717
- Fixed performance regression in :meth:`Series.combine_first` (:issue:`55845`)
1818
- Fixed regression in :func:`merge_ordered` raising ``TypeError`` for ``fill_method="ffill"`` and ``how="left"`` (:issue:`57010`)
19+
- Fixed regression in :func:`wide_to_long` raising an ``AttributeError`` for string columns (:issue:`57066`)
1920
- Fixed regression in :meth:`DataFrameGroupBy.idxmin`, :meth:`DataFrameGroupBy.idxmax`, :meth:`SeriesGroupBy.idxmin`, :meth:`SeriesGroupBy.idxmax` ignoring the ``skipna`` argument (:issue:`57040`)
2021
- Fixed regression in :meth:`DataFrameGroupBy.idxmin`, :meth:`DataFrameGroupBy.idxmax`, :meth:`SeriesGroupBy.idxmin`, :meth:`SeriesGroupBy.idxmax` where values containing the minimum or maximum value for the dtype could produce incorrect results (:issue:`57040`)
2122
- Fixed regression in :meth:`Series.pct_change` raising a ``ValueError`` for an empty :class:`Series` (:issue:`57056`)

doc/source/whatsnew/v3.0.0.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ Timezones
142142

143143
Numeric
144144
^^^^^^^
145-
-
145+
- Bug in ``np.matmul`` with :class:`Index` inputs raising a ``TypeError`` (:issue:`57079`)
146146
-
147147

148148
Conversion
@@ -152,7 +152,7 @@ Conversion
152152

153153
Strings
154154
^^^^^^^
155-
-
155+
- Bug in :meth:`Series.value_counts` would not respect ``sort=False`` for series having ``string`` dtype (:issue:`55224`)
156156
-
157157

158158
Interval

pandas/core/arrays/string_.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -542,7 +542,7 @@ def max(self, axis=None, skipna: bool = True, **kwargs) -> Scalar:
542542
def value_counts(self, dropna: bool = True) -> Series:
543543
from pandas.core.algorithms import value_counts_internal as value_counts
544544

545-
result = value_counts(self._ndarray, dropna=dropna).astype("Int64")
545+
result = value_counts(self._ndarray, sort=False, dropna=dropna).astype("Int64")
546546
result.index = result.index.astype(self.dtype)
547547
return result
548548

pandas/core/indexes/base.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -951,6 +951,9 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str_t, *inputs, **kwargs):
951951
elif method == "reduce":
952952
result = lib.item_from_zerodim(result)
953953
return result
954+
elif is_scalar(result):
955+
# e.g. matmul
956+
return result
954957

955958
if result.dtype == np.float16:
956959
result = result.astype(np.float32)

pandas/core/reshape/melt.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -458,8 +458,7 @@ def wide_to_long(
458458

459459
def get_var_names(df, stub: str, sep: str, suffix: str):
460460
regex = rf"^{re.escape(stub)}{re.escape(sep)}{suffix}$"
461-
pattern = re.compile(regex)
462-
return df.columns[df.columns.str.match(pattern)]
461+
return df.columns[df.columns.str.match(regex)]
463462

464463
def melt_stub(df, stub: str, i, j, value_vars, sep: str):
465464
newdf = melt(

pandas/core/strings/accessor.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1332,14 +1332,14 @@ def contains(
13321332
return self._wrap_result(result, fill_value=na, returns_string=False)
13331333

13341334
@forbid_nonstring_types(["bytes"])
1335-
def match(self, pat, case: bool = True, flags: int = 0, na=None):
1335+
def match(self, pat: str, case: bool = True, flags: int = 0, na=None):
13361336
"""
13371337
Determine if each string starts with a match of a regular expression.
13381338
13391339
Parameters
13401340
----------
13411341
pat : str
1342-
Character sequence or regular expression.
1342+
Character sequence.
13431343
case : bool, default True
13441344
If True, case sensitive.
13451345
flags : int, default 0 (no flags)

pandas/tests/arrays/string_/test_string.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -584,6 +584,19 @@ def test_value_counts_with_normalize(dtype):
584584
tm.assert_series_equal(result, expected)
585585

586586

587+
def test_value_counts_sort_false(dtype):
588+
if getattr(dtype, "storage", "") == "pyarrow":
589+
exp_dtype = "int64[pyarrow]"
590+
elif getattr(dtype, "storage", "") == "pyarrow_numpy":
591+
exp_dtype = "int64"
592+
else:
593+
exp_dtype = "Int64"
594+
ser = pd.Series(["a", "b", "c", "b"], dtype=dtype)
595+
result = ser.value_counts(sort=False)
596+
expected = pd.Series([1, 2, 1], index=ser[:3], dtype=exp_dtype, name="count")
597+
tm.assert_series_equal(result, expected)
598+
599+
587600
@pytest.mark.parametrize(
588601
"values, expected",
589602
[

pandas/tests/reshape/test_melt.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1217,3 +1217,33 @@ def test_missing_stubname(self, dtype):
12171217
new_level = expected.index.levels[0].astype(dtype)
12181218
expected.index = expected.index.set_levels(new_level, level=0)
12191219
tm.assert_frame_equal(result, expected)
1220+
1221+
1222+
def test_wide_to_long_pyarrow_string_columns():
1223+
# GH 57066
1224+
pytest.importorskip("pyarrow")
1225+
df = DataFrame(
1226+
{
1227+
"ID": {0: 1},
1228+
"R_test1": {0: 1},
1229+
"R_test2": {0: 1},
1230+
"R_test3": {0: 2},
1231+
"D": {0: 1},
1232+
}
1233+
)
1234+
df.columns = df.columns.astype("string[pyarrow_numpy]")
1235+
result = wide_to_long(
1236+
df, stubnames="R", i="ID", j="UNPIVOTED", sep="_", suffix=".*"
1237+
)
1238+
expected = DataFrame(
1239+
[[1, 1], [1, 1], [1, 2]],
1240+
columns=Index(["D", "R"], dtype=object),
1241+
index=pd.MultiIndex.from_arrays(
1242+
[
1243+
[1, 1, 1],
1244+
Index(["test1", "test2", "test3"], dtype="string[pyarrow_numpy]"),
1245+
],
1246+
names=["ID", "UNPIVOTED"],
1247+
),
1248+
)
1249+
tm.assert_frame_equal(result, expected)

pandas/tests/series/test_ufunc.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -427,6 +427,13 @@ def test_np_matmul():
427427
tm.assert_frame_equal(expected, result)
428428

429429

430+
@pytest.mark.parametrize("box", [pd.Index, pd.Series])
431+
def test_np_matmul_1D(box):
432+
result = np.matmul(box([1, 2]), box([2, 3]))
433+
assert result == 8
434+
assert isinstance(result, np.int64)
435+
436+
430437
def test_array_ufuncs_for_many_arguments():
431438
# GH39853
432439
def add3(x, y, z):

0 commit comments

Comments
 (0)