diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 74f95cc7f52b4..cd3e2640d8e92 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -1642,8 +1642,11 @@ def kind(self) -> str: @cache_readonly def itemsize(self) -> int: """Return the number of bytes in this dtype""" + if hasattr(self.pyarrow_dtype, "bit_width"): + return self.pyarrow_dtype.bit_width // 8 return self.numpy_dtype.itemsize + def construct_array_type(self) -> type_t[BaseMaskedArray]: """ Return the array type associated with this dtype. @@ -2307,8 +2310,11 @@ def kind(self) -> str: @cache_readonly def itemsize(self) -> int: """Return the number of bytes in this dtype""" + if hasattr(self.pyarrow_dtype, "bit_width"): + return self.pyarrow_dtype.bit_width // 8 return self.numpy_dtype.itemsize + def construct_array_type(self) -> type_t[ArrowExtensionArray]: """ Return the array type associated with this dtype. diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index 38c84e45c6fe8..e32a03208d733 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -1256,3 +1256,39 @@ def test_categorical_nan_no_dtype_conversion(): expected = pd.DataFrame({"a": Categorical([1], [1]), "b": [1]}) df.loc[0, "a"] = np.array([1]) tm.assert_frame_equal(df, expected) + + +import pyarrow as pa + +class TestArrowDtype: + @pytest.mark.parametrize( + "pa_dtype, expected_itemsize", + [ + (pytest.param(lambda: pa.date32(), 4, id="date32")), + (pytest.param(lambda: pa.date64(), 8, id="date64")), + (pytest.param(lambda: pa.time32('s'), 4, id="time32_s")), + (pytest.param(lambda: pa.time64('us'), 8, id="time64_us")), + (pytest.param(lambda: pa.int32(), 4, id="int32")), + (pytest.param(lambda: pa.int64(), 8, id="int64")), + ], + ) + def test_itemsize_with_bit_width(self, pa_dtype, expected_itemsize): + """Test that ArrowDtype.itemsize correctly uses bit_width when available.""" + pytest.importorskip("pyarrow", "12.0.1") + from pandas.core.dtypes.dtypes import ArrowDtype + + dtype = ArrowDtype(pa_dtype()) + assert dtype.itemsize == expected_itemsize + + def test_itemsize_fallback_to_numpy(self): + """Test itemsize falls back to numpy_dtype when bit_width unavailable.""" + pytest.importorskip("pyarrow", "12.0.1") + import pyarrow as pa + from pandas.core.dtypes.dtypes import ArrowDtype + + # string types don't have bit_width + dtype = ArrowDtype(pa.string()) + # Should fall back to numpy behavior without error + result = dtype.itemsize + assert isinstance(result, int) + assert result > 0 \ No newline at end of file