Skip to content

Commit fb1ef0e

Browse files
Merge remote-tracking branch 'upstream/main' into doc-whatsnew-2.3.1
2 parents d951581 + ebca3c5 commit fb1ef0e

File tree

6 files changed

+17
-16
lines changed

6 files changed

+17
-16
lines changed

doc/source/whatsnew/v2.3.1.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ correctly, rather than defaulting to ``object`` dtype. For example:
6161
Bug fixes
6262
^^^^^^^^^
6363
- Bug in :meth:`.DataFrameGroupBy.min`, :meth:`.DataFrameGroupBy.max`, :meth:`.Resampler.min`, :meth:`.Resampler.max` where all NA values of string dtype would return float instead of string dtype (:issue:`60810`)
64+
- Bug in :meth:`DataFrame.join` incorrectly downcasting object-dtype indexes (:issue:`61771`)
6465
- Bug in :meth:`DataFrame.sum` with ``axis=1``, :meth:`.DataFrameGroupBy.sum` or :meth:`.SeriesGroupBy.sum` with ``skipna=True``, and :meth:`.Resampler.sum` with all NA values of :class:`StringDtype` resulted in ``0`` instead of the empty string ``""`` (:issue:`60229`)
6566
- Fixed bug in :meth:`DataFrame.explode` and :meth:`Series.explode` where methods would fail with ``dtype="str"`` (:issue:`61623`)
6667
- Fixed bug in unpickling objects pickled in pandas versions pre-2.3.0 that used :class:`StringDtype` (:issue:`61763`)

pandas/compat/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
pa_version_under18p0,
3636
pa_version_under19p0,
3737
pa_version_under20p0,
38+
pa_version_under21p0,
3839
)
3940

4041
if TYPE_CHECKING:
@@ -168,4 +169,5 @@ def is_ci_environment() -> bool:
168169
"pa_version_under18p0",
169170
"pa_version_under19p0",
170171
"pa_version_under20p0",
172+
"pa_version_under21p0",
171173
]

pandas/compat/pyarrow.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
pa_version_under18p0 = _palv < Version("18.0.0")
1919
pa_version_under19p0 = _palv < Version("19.0.0")
2020
pa_version_under20p0 = _palv < Version("20.0.0")
21+
pa_version_under21p0 = _palv < Version("21.0.0")
2122
HAS_PYARROW = _palv >= Version("12.0.1")
2223
except ImportError:
2324
pa_version_under12p1 = True
@@ -30,4 +31,5 @@
3031
pa_version_under18p0 = True
3132
pa_version_under19p0 = True
3233
pa_version_under20p0 = True
34+
pa_version_under21p0 = True
3335
HAS_PYARROW = False

pandas/core/reshape/merge.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1328,13 +1328,13 @@ def _maybe_add_join_keys(
13281328
# if we have an all missing left_indexer
13291329
# make sure to just use the right values or vice-versa
13301330
if left_indexer is not None and (left_indexer == -1).all():
1331-
key_col = Index(rvals)
1331+
key_col = Index(rvals, dtype=rvals.dtype, copy=False)
13321332
result_dtype = rvals.dtype
13331333
elif right_indexer is not None and (right_indexer == -1).all():
1334-
key_col = Index(lvals)
1334+
key_col = Index(lvals, dtype=lvals.dtype, copy=False)
13351335
result_dtype = lvals.dtype
13361336
else:
1337-
key_col = Index(lvals)
1337+
key_col = Index(lvals, dtype=lvals.dtype, copy=False)
13381338
if left_indexer is not None:
13391339
mask_left = left_indexer == -1
13401340
key_col = key_col.where(~mask_left, rvals)

pandas/tests/copy_view/test_functions.py

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,6 @@
11
import numpy as np
22
import pytest
33

4-
from pandas._config import using_string_dtype
5-
6-
from pandas.compat import HAS_PYARROW
7-
84
from pandas import (
95
DataFrame,
106
Index,
@@ -247,13 +243,9 @@ def test_merge_copy_keyword():
247243
assert np.shares_memory(get_array(df2, "b"), get_array(result, "b"))
248244

249245

250-
@pytest.mark.xfail(
251-
using_string_dtype() and HAS_PYARROW,
252-
reason="TODO(infer_string); result.index infers str dtype while both "
253-
"df1 and df2 index are object.",
254-
)
255-
def test_join_on_key():
256-
df_index = Index(["a", "b", "c"], name="key", dtype=object)
246+
@pytest.mark.parametrize("dtype", [object, "str"])
247+
def test_join_on_key(dtype):
248+
df_index = Index(["a", "b", "c"], name="key", dtype=dtype)
257249

258250
df1 = DataFrame({"a": [1, 2, 3]}, index=df_index.copy(deep=True))
259251
df2 = DataFrame({"b": [4, 5, 6]}, index=df_index.copy(deep=True))
@@ -265,7 +257,7 @@ def test_join_on_key():
265257

266258
assert np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
267259
assert np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
268-
assert np.shares_memory(get_array(result.index), get_array(df1.index))
260+
assert tm.shares_memory(get_array(result.index), get_array(df1.index))
269261
assert not np.shares_memory(get_array(result.index), get_array(df2.index))
270262

271263
result.iloc[0, 0] = 0

pandas/tests/extension/test_arrow.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
pa_version_under14p0,
4444
pa_version_under19p0,
4545
pa_version_under20p0,
46+
pa_version_under21p0,
4647
)
4748

4849
from pandas.core.dtypes.dtypes import (
@@ -542,7 +543,10 @@ def _get_expected_reduction_dtype(self, arr, op_name: str, skipna: bool):
542543
else:
543544
cmp_dtype = arr.dtype
544545
elif arr.dtype.name == "decimal128(7, 3)[pyarrow]":
545-
if op_name not in ["median", "var", "std", "sem", "skew"]:
546+
if op_name == "sum" and not pa_version_under21p0:
547+
# https://github.com/apache/arrow/pull/44184
548+
cmp_dtype = ArrowDtype(pa.decimal128(38, 3))
549+
elif op_name not in ["median", "var", "std", "sem", "skew"]:
546550
cmp_dtype = arr.dtype
547551
else:
548552
cmp_dtype = "float64[pyarrow]"

0 commit comments

Comments
 (0)