Skip to content

Commit 0eee625

Browse files
fix constructor to not convert to NA
1 parent 63a7fc5 commit 0eee625

File tree

3 files changed

+28
-1
lines changed

3 files changed

+28
-1
lines changed

pandas/_testing/asserters.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -805,6 +805,16 @@ def assert_extension_array_equal(
805805
left_na, right_na, obj=f"{obj} NA mask", index_values=index_values
806806
)
807807

808+
# Specifically for StringArrayNumpySemantics, validate here we have a valid array
809+
if isinstance(left.dtype, StringDtype) and left.dtype.storage == "python_numpy":
810+
assert np.all(
811+
[np.isnan(val) for val in left._ndarray[left_na]]
812+
), "wrong missing value sentinels"
813+
if isinstance(right.dtype, StringDtype) and right.dtype.storage == "python_numpy":
814+
assert np.all(
815+
[np.isnan(val) for val in right._ndarray[right_na]]
816+
), "wrong missing value sentinels"
817+
808818
left_valid = left[~left_na].to_numpy(dtype=object)
809819
right_valid = right[~right_na].to_numpy(dtype=object)
810820
if check_exact:

pandas/core/arrays/string_.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -697,6 +697,19 @@ def _str_map(
697697
class StringArrayNumpySemantics(StringArray):
698698
_storage = "python_numpy"
699699

700+
def _validate(self) -> None:
701+
"""Validate that we only store NaN or strings."""
702+
if len(self._ndarray) and not lib.is_string_array(self._ndarray, skipna=True):
703+
raise ValueError(
704+
"StringArrayNumpySemantics requires a sequence of strings or NaN"
705+
)
706+
if self._ndarray.dtype != "object":
707+
raise ValueError(
708+
"StringArrayNumpySemantics requires a sequence of strings or NaN. Got "
709+
f"'{self._ndarray.dtype}' dtype instead."
710+
)
711+
# TODO validate or force NA/None to NaN
712+
700713
@classmethod
701714
def _from_sequence(
702715
cls, scalars, *, dtype: Dtype | None = None, copy: bool = False

pandas/tests/arrays/string_/test_string.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -325,8 +325,10 @@ def test_comparison_methods_array(comparison_op, dtype):
325325

326326

327327
def test_constructor_raises(cls):
328-
if cls is pd.arrays.StringArray or cls is StringArrayNumpySemantics:
328+
if cls is pd.arrays.StringArray:
329329
msg = "StringArray requires a sequence of strings or pandas.NA"
330+
elif cls is StringArrayNumpySemantics:
331+
msg = "StringArrayNumpySemantics requires a sequence of strings or NaN"
330332
else:
331333
msg = "Unsupported type '<class 'numpy.ndarray'>' for ArrowExtensionArray"
332334

@@ -377,6 +379,8 @@ def test_from_sequence_no_mutate(copy, cls, dtype):
377379
import pyarrow as pa
378380

379381
expected = cls(pa.array(na_arr, type=pa.string(), from_pandas=True))
382+
elif cls is StringArrayNumpySemantics:
383+
expected = cls(nan_arr)
380384
else:
381385
expected = cls(na_arr)
382386

0 commit comments

Comments
 (0)