Skip to content

Commit 66afa9d

Browse files
ERR (string dtype): harmonize setitem error message for python and pyarrow storage
1 parent e49ab80 commit 66afa9d

File tree

7 files changed

+31
-25
lines changed

7 files changed

+31
-25
lines changed

pandas/core/arrays/arrow/array.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1145,7 +1145,7 @@ def fillna(
11451145
try:
11461146
fill_value = self._box_pa(value, pa_type=self._pa_array.type)
11471147
except pa.ArrowTypeError as err:
1148-
msg = f"Invalid value '{value!s}' for dtype {self.dtype}"
1148+
msg = f"Invalid value '{value!s}' for dtype '{self.dtype}'"
11491149
raise TypeError(msg) from err
11501150

11511151
try:
@@ -2136,7 +2136,7 @@ def _maybe_convert_setitem_value(self, value):
21362136
try:
21372137
value = self._box_pa(value, self._pa_array.type)
21382138
except pa.ArrowTypeError as err:
2139-
msg = f"Invalid value '{value!s}' for dtype {self.dtype}"
2139+
msg = f"Invalid value '{value!s}' for dtype '{self.dtype}'"
21402140
raise TypeError(msg) from err
21412141
return value
21422142

pandas/core/arrays/masked.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -286,7 +286,7 @@ def _validate_setitem_value(self, value):
286286

287287
# Note: without the "str" here, the f-string rendering raises in
288288
# py38 builds.
289-
raise TypeError(f"Invalid value '{value!s}' for dtype {self.dtype}")
289+
raise TypeError(f"Invalid value '{value!s}' for dtype '{self.dtype}'")
290290

291291
def __setitem__(self, key, value) -> None:
292292
key = check_array_indexer(self, key)

pandas/core/arrays/string_.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -641,7 +641,8 @@ def _validate_scalar(self, value):
641641
return self.dtype.na_value
642642
elif not isinstance(value, str):
643643
raise TypeError(
644-
f"Cannot set non-string value '{value}' into a string array."
644+
f"Invalid value '{value}' for dtype '{self.dtype}'. Value should be a "
645+
f"string or missing value, got '{type(value).__name__}' instead."
645646
)
646647
return value
647648

@@ -732,7 +733,9 @@ def __setitem__(self, key, value) -> None:
732733
value = self.dtype.na_value
733734
elif not isinstance(value, str):
734735
raise TypeError(
735-
f"Cannot set non-string value '{value}' into a StringArray."
736+
f"Invalid value '{value}' for dtype '{self.dtype}'. Value should "
737+
f"be a string or missing value, got '{type(value).__name__}' "
738+
"instead."
736739
)
737740
else:
738741
if not is_array_like(value):
@@ -742,7 +745,10 @@ def __setitem__(self, key, value) -> None:
742745
# compatible, compatibility with arrow backed strings
743746
value = np.asarray(value)
744747
if len(value) and not lib.is_string_array(value, skipna=True):
745-
raise TypeError("Must provide strings.")
748+
raise TypeError(
749+
"Invalid value for dtype 'str'. Value should be a "
750+
"string or missing value (or array of those)."
751+
)
746752

747753
mask = isna(value)
748754
if mask.any():

pandas/core/arrays/string_arrow.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,10 @@ def insert(self, loc: int, item) -> ArrowStringArray:
216216
if self.dtype.na_value is np.nan and item is np.nan:
217217
item = libmissing.NA
218218
if not isinstance(item, str) and item is not libmissing.NA:
219-
raise TypeError("Scalar must be NA or str")
219+
raise TypeError(
220+
f"Invalid value '{item}' for dtype 'str'. Value should be a "
221+
f"string or missing value, got '{type(item).__name__}' instead."
222+
)
220223
return super().insert(loc, item)
221224

222225
def _convert_bool_result(self, values, na=lib.no_default, method_name=None):
@@ -248,13 +251,19 @@ def _maybe_convert_setitem_value(self, value):
248251
if isna(value):
249252
value = None
250253
elif not isinstance(value, str):
251-
raise TypeError("Scalar must be NA or str")
254+
raise TypeError(
255+
f"Invalid value '{value}' for dtype 'str'. Value should be a "
256+
f"string or missing value, got '{type(value).__name__}' instead."
257+
)
252258
else:
253259
value = np.array(value, dtype=object, copy=True)
254260
value[isna(value)] = None
255261
for v in value:
256262
if not (v is None or isinstance(v, str)):
257-
raise TypeError("Must provide strings")
263+
raise TypeError(
264+
"Invalid value for dtype 'str'. Value should be a "
265+
"string or missing value (or array of those)."
266+
)
258267
return super()._maybe_convert_setitem_value(value)
259268

260269
def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]:

pandas/tests/arrays/string_/test_string.py

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -109,14 +109,11 @@ def test_none_to_nan(cls, dtype):
109109
def test_setitem_validates(cls, dtype):
110110
arr = cls._from_sequence(["a", "b"], dtype=dtype)
111111

112-
if dtype.storage == "python":
113-
msg = "Cannot set non-string value '10' into a StringArray."
114-
else:
115-
msg = "Scalar must be NA or str"
112+
msg = "Invalid value '10' for dtype 'str"
116113
with pytest.raises(TypeError, match=msg):
117114
arr[0] = 10
118115

119-
msg = "Must provide strings"
116+
msg = "Invalid value for dtype 'str"
120117
with pytest.raises(TypeError, match=msg):
121118
arr[:] = np.array([1, 2])
122119

@@ -508,10 +505,7 @@ def test_fillna_args(dtype):
508505
expected = pd.array(["a", "b"], dtype=dtype)
509506
tm.assert_extension_array_equal(res, expected)
510507

511-
if dtype.storage == "pyarrow":
512-
msg = "Invalid value '1' for dtype str"
513-
else:
514-
msg = "Cannot set non-string value '1' into a StringArray."
508+
msg = "Invalid value '1' for dtype 'str"
515509
with pytest.raises(TypeError, match=msg):
516510
arr.fillna(value=1)
517511

@@ -727,10 +721,7 @@ def test_setitem_scalar_with_mask_validation(dtype):
727721

728722
# for other non-string we should also raise an error
729723
ser = pd.Series(["a", "b", "c"], dtype=dtype)
730-
if dtype.storage == "python":
731-
msg = "Cannot set non-string value"
732-
else:
733-
msg = "Scalar must be NA or str"
724+
msg = "Invalid value '1' for dtype 'str"
734725
with pytest.raises(TypeError, match=msg):
735726
ser[mask] = 1
736727

pandas/tests/frame/indexing/test_where.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1030,7 +1030,7 @@ def test_where_int_overflow(replacement, using_infer_string):
10301030
df = DataFrame([[1.0, 2e25, "nine"], [np.nan, 0.1, None]])
10311031
if using_infer_string and replacement not in (None, "snake"):
10321032
with pytest.raises(
1033-
TypeError, match="Cannot set non-string value|Scalar must be NA or str"
1033+
TypeError, match=f"Invalid value '{replacement}' for dtype 'str'"
10341034
):
10351035
df.where(pd.notnull(df), replacement)
10361036
return

pandas/tests/series/indexing/test_setitem.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -865,7 +865,7 @@ def test_index_where(self, obj, key, expected, raises, val, using_infer_string):
865865
mask[key] = True
866866

867867
if using_infer_string and obj.dtype == object:
868-
with pytest.raises(TypeError, match="Scalar must"):
868+
with pytest.raises(TypeError, match="Invalid value"):
869869
Index(obj).where(~mask, val)
870870
else:
871871
res = Index(obj).where(~mask, val)
@@ -878,7 +878,7 @@ def test_index_putmask(self, obj, key, expected, raises, val, using_infer_string
878878
mask[key] = True
879879

880880
if using_infer_string and obj.dtype == object:
881-
with pytest.raises(TypeError, match="Scalar must"):
881+
with pytest.raises(TypeError, match="Invalid value"):
882882
Index(obj).putmask(mask, val)
883883
else:
884884
res = Index(obj).putmask(mask, val)

0 commit comments

Comments
 (0)