From 4d343fbca42523a37c91f4bf3ce60099c23d1a47 Mon Sep 17 00:00:00 2001 From: george-adams1 Date: Sun, 24 Aug 2025 15:32:16 -0400 Subject: [PATCH] fixes ArrowDtype.itemsize for fixed-width types --- pandas/core/dtypes/dtypes.py | 7 ++++++- pandas/tests/extension/test_arrow.py | 23 +++++++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index eb5c7739e5132..c2d37d53604e5 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -2307,7 +2307,12 @@ def kind(self) -> str: @cache_readonly def itemsize(self) -> int: """Return the number of bytes in this dtype""" - return self.numpy_dtype.itemsize + try: + # Use PyArrow's bit_width for fixed-width types + return self.pyarrow_dtype.bit_width // 8 # convert from bit to bytes + except (AttributeError, NotImplementedError, ValueError): + # Fall back to numpy dtype for variable-width or unsupported types + return self.numpy_dtype.itemsize def construct_array_type(self) -> type_t[ArrowExtensionArray]: """ diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 4c0ced8b56288..1f07cf72ff040 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -3576,6 +3576,29 @@ def test_timestamp_dtype_disallows_decimal(): pd.array(vals, dtype=ArrowDtype(pa.timestamp("us"))) +def test_arrow_dtype_itemsize(): + # Regression test for GH#57948 where date32[day] was incorrectly + # reporting 8 bytes instead of 4. + + # date32 should be 4 bytes, not 8 + dtype = ArrowDtype(pa.date32()) + assert dtype.itemsize == 4 + + # Testing other fixed-width types + assert ArrowDtype(pa.int32()).itemsize == 4 + assert ArrowDtype(pa.int64()).itemsize == 8 + assert ArrowDtype(pa.float32()).itemsize == 4 + assert ArrowDtype(pa.float64()).itemsize == 8 + assert ArrowDtype(pa.date64()).itemsize == 8 + + # Test that variable-width types fall back gracefully + string_dtype = ArrowDtype(pa.string()) + assert isinstance(string_dtype.itemsize, int) + + list_dtype = ArrowDtype(pa.list_(pa.int32())) + assert isinstance(list_dtype.itemsize, int) + + def test_timestamp_dtype_matches_to_datetime(): # GH#61775 dtype1 = "datetime64[ns, US/Eastern]"