diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 4d9a45abe17cd..6892f50b86990 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -782,6 +782,7 @@ Sparse ExtensionArray ^^^^^^^^^^^^^^ +- Bug in :class:`Categorical` when constructing with an :class:`Index` with :class:`ArrowDtype` (:issue:`60563`) - Bug in :meth:`.arrays.ArrowExtensionArray.__setitem__` which caused wrong behavior when using an integer array with repeated values as a key (:issue:`58530`) - Bug in :meth:`api.types.is_datetime64_any_dtype` where a custom :class:`ExtensionDtype` would return ``False`` for array-likes (:issue:`57055`) - Bug in comparison between object with :class:`ArrowDtype` and incompatible-dtyped (e.g. string vs bool) incorrectly raising instead of returning all-``False`` (for ``==``) or all-``True`` (for ``!=``) (:issue:`59505`) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index ae20bfb6b284b..0ce700772fdcc 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -447,7 +447,12 @@ def __init__( if isinstance(values.dtype, ArrowDtype) and issubclass( values.dtype.type, CategoricalDtypeType ): - arr = values._pa_array.combine_chunks() + from pandas import Index + + if isinstance(values, Index): + arr = values._data._pa_array.combine_chunks() + else: + arr = values._pa_array.combine_chunks() categories = arr.dictionary.to_pandas(types_mapper=ArrowDtype) codes = arr.indices.to_numpy() dtype = CategoricalDtype(categories, values.dtype.pyarrow_dtype.ordered) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index d6f428f4938a6..f4a63ff4c92ec 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -3511,3 +3511,20 @@ def test_map_numeric_na_action(): result = ser.map(lambda x: 42, na_action="ignore") expected = pd.Series([42.0, 42.0, np.nan], dtype="float64") tm.assert_series_equal(result, expected) + + +def test_categorical_from_arrow_dictionary(): + # GH 60563 + df = pd.DataFrame( + {"A": ["a1", "a2"]}, dtype=ArrowDtype(pa.dictionary(pa.int32(), pa.utf8())) + ) + result = df.value_counts(dropna=False) + expected = pd.Series( + [1, 1], + index=pd.MultiIndex.from_arrays( + [pd.Index(["a1", "a2"], dtype=ArrowDtype(pa.string()), name="A")] + ), + name="count", + dtype="int64", + ) + tm.assert_series_equal(result, expected)