Skip to content

Commit ad0ec21

Browse files
iabhi4mroeschke
andauthored
BUG: Fix Series.str.zfill for ArrowDtype string arrays #61485 (#61533)
Co-authored-by: Matthew Roeschke <[email protected]>
1 parent 820982b commit ad0ec21

File tree

6 files changed

+32
-3
lines changed

6 files changed

+32
-3
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1008,8 +1008,8 @@ Conversion
10081008

10091009
Strings
10101010
^^^^^^^
1011+
- Bug in :meth:`Series.str.zfill` raising ``AttributeError`` for :class:`ArrowDtype` (:issue:`61485`)
10111012
- Bug in :meth:`Series.value_counts` would not respect ``sort=False`` for series having ``string`` dtype (:issue:`55224`)
1012-
-
10131013

10141014
Interval
10151015
^^^^^^^^

pandas/core/arrays/arrow/array.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2808,6 +2808,13 @@ def _str_wrap(self, width: int, **kwargs) -> Self:
28082808
result = self._apply_elementwise(predicate)
28092809
return self._from_pyarrow_array(pa.chunked_array(result))
28102810

2811+
def _str_zfill(self, width: int) -> Self:
2812+
# TODO: Replace with pc.utf8_zfill when supported by arrow
2813+
# Arrow ENH - https://github.com/apache/arrow/issues/46683
2814+
predicate = lambda val: val.zfill(width)
2815+
result = self._apply_elementwise(predicate)
2816+
return type(self)(pa.chunked_array(result))
2817+
28112818
@property
28122819
def _dt_days(self) -> Self:
28132820
return self._from_pyarrow_array(

pandas/core/arrays/string_.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1134,3 +1134,6 @@ def _cmp_method(self, other, op):
11341134
return res_arr
11351135

11361136
_arith_method = _cmp_method
1137+
1138+
def _str_zfill(self, width: int) -> Self:
1139+
return self._str_map(lambda x: x.zfill(width))

pandas/core/strings/accessor.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1912,8 +1912,8 @@ def zfill(self, width: int):
19121912
if not is_integer(width):
19131913
msg = f"width must be of integer type, not {type(width).__name__}"
19141914
raise TypeError(msg)
1915-
f = lambda x: x.zfill(width)
1916-
result = self._data.array._str_map(f)
1915+
1916+
result = self._data.array._str_zfill(width)
19171917
return self._wrap_result(result)
19181918

19191919
def slice(self, start=None, stop=None, step=None):

pandas/core/strings/object_array.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -544,3 +544,6 @@ def f(x):
544544
return empty_row
545545

546546
return [f(val) for val in np.asarray(self)]
547+
548+
def _str_zfill(self, width: int):
549+
return self._str_map(lambda x: x.zfill(width))

pandas/tests/strings/test_string_array.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,3 +110,19 @@ def test_string_array_extract(nullable_string_dtype):
110110

111111
result = result.astype(object)
112112
tm.assert_equal(result, expected)
113+
114+
115+
@pytest.mark.parametrize(
116+
"values, width, expected",
117+
[
118+
(["a", "ab", "abc", None], 4, ["000a", "00ab", "0abc", None]),
119+
(["1", "-1", "+1", None], 4, ["0001", "-001", "+001", None]),
120+
(["1234", "-1234"], 3, ["1234", "-1234"]),
121+
],
122+
)
123+
def test_string_array_zfill(nullable_string_dtype, values, width, expected):
124+
# GH #61485
125+
s = Series(values, dtype=nullable_string_dtype)
126+
result = s.str.zfill(width)
127+
expected = Series(expected, dtype=nullable_string_dtype)
128+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)