Skip to content

Commit 7e0649f

Browse files
committed
update
1 parent 1a0e783 commit 7e0649f

File tree

5 files changed

+14
-21
lines changed

5 files changed

+14
-21
lines changed

pandas/core/arrays/_mixins.py

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -411,17 +411,10 @@ def _where(self: Self, mask: npt.NDArray[np.bool_], value) -> Self:
411411
"""
412412
value = self._validate_setitem_value(value)
413413

414-
# Note: For backwards compatibility purposes
415-
# StringArray returns an object array in __array__
416-
# when it is backed by a numpy StringDType
417-
# We need to work around that here.
418-
if hasattr(value, "_ndarray") and value._ndarray.dtype.kind == "T":
419-
value = value._ndarray
420-
421-
# np.where will not preserve the StringDType
422-
# TODO: ask Nathan about this
423-
# also TODO: this is a mess
424414
if self._ndarray.dtype.kind == "T":
415+
# Handling non-string values and numpy StringDtype
416+
# explicitly since we don't want to end up with object
417+
# and lose the string dtype
425418
if value is np.nan:
426419
value = libmissing.NA
427420
res_values = self._ndarray.copy()

pandas/core/arrays/numpy_.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ def __array__(
166166
self, dtype: NpDtype | None = None, copy: bool | None = None
167167
) -> np.ndarray:
168168
array = self._ndarray
169-
# to_numpy on StringArray backed by StringDType should still return object dtype
169+
# np.array on StringArray backed by StringDType should still return object dtype
170170
# for backwards compat
171171
if self._ndarray.dtype.kind == "T":
172172
array = array.astype(object)
@@ -516,8 +516,8 @@ def to_numpy(
516516
# to_numpy on StringArray backed by StringDType should still return object dtype
517517
# for backwards compat
518518
array = self._ndarray
519-
if self._ndarray.dtype.kind == "T":
520-
array = array.astype(object)
519+
if dtype is None and self._ndarray.dtype.kind == "T":
520+
dtype = object
521521
result = np.asarray(array, dtype=dtype)
522522
if na_value is not lib.no_default and mask.any():
523523
result = result.copy()

pandas/core/strings/object_array.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -623,7 +623,9 @@ def _str_isupper(self) -> BooleanArray:
623623
def _str_len(self) -> IntegerArray:
624624
if self._ndarray.dtype == object:
625625
return super()._str_len()
626-
result = np.strings.str_len(self._ndarray)
626+
na_mask = isna(self._ndarray)
627+
result = np.empty_like(self._ndarray, dtype="int64")
628+
result[~na_mask] = np.strings.str_len(self._ndarray[~na_mask])
627629
return IntegerArray(result, isna(self._ndarray))
628630

629631
def _str_lstrip(self, to_strip=None):

pandas/tests/base/test_misc.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -103,19 +103,17 @@ def test_memory_usage(index_or_series_memory_obj, request):
103103
is_categorical = isinstance(obj.dtype, pd.CategoricalDtype) or (
104104
is_ser and isinstance(obj.index.dtype, pd.CategoricalDtype)
105105
)
106-
is_object_string = is_dtype_equal(obj, "string[python]") or (
106+
is_string_array = is_dtype_equal(obj, "string[python]") or (
107107
is_ser and is_dtype_equal(obj.index.dtype, "string[python]")
108108
)
109-
if is_object_string and np_version_gt2:
110-
mark = pytest.mark.xfail(
111-
True, reason="NumPy does not expose an API to get StringDType memory usage"
112-
)
109+
if is_string_array and np_version_gt2:
110+
mark = pytest.mark.xfail(reason="NumPy does not expose an API to get StringDType memory usage")
113111
request.applymarker(mark)
114112

115113
if len(obj) == 0:
116114
expected = 0
117115
assert res_deep == res == expected
118-
elif is_object or is_categorical:
116+
elif is_object or is_categorical or is_string_array:
119117
# only deep will pick them up
120118
assert res_deep > res
121119
else:

pandas/tests/frame/test_constructors.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3068,7 +3068,7 @@ def test_np_string_array(self, data):
30683068

30693069
data["a"] = np.array(data["a"], dtype=StringDType())
30703070
res = DataFrame(data)
3071-
assert res["a"].dtype == np.dtypes.StringDType()
3071+
assert res["a"].dtype == np.object_
30723072
assert (res["a"] == data["a"]).all()
30733073

30743074

0 commit comments

Comments
 (0)