diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index b6f1412066574..7db10b1cc4a80 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -1145,7 +1145,7 @@ def fillna( try: fill_value = self._box_pa(value, pa_type=self._pa_array.type) except pa.ArrowTypeError as err: - msg = f"Invalid value '{value!s}' for dtype {self.dtype}" + msg = f"Invalid value '{value!s}' for dtype '{self.dtype}'" raise TypeError(msg) from err try: @@ -2136,7 +2136,7 @@ def _maybe_convert_setitem_value(self, value): try: value = self._box_pa(value, self._pa_array.type) except pa.ArrowTypeError as err: - msg = f"Invalid value '{value!s}' for dtype {self.dtype}" + msg = f"Invalid value '{value!s}' for dtype '{self.dtype}'" raise TypeError(msg) from err return value diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 349d2ec4d3cc9..f3a0cc0dccdb3 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -286,7 +286,7 @@ def _validate_setitem_value(self, value): # Note: without the "str" here, the f-string rendering raises in # py38 builds. - raise TypeError(f"Invalid value '{value!s}' for dtype {self.dtype}") + raise TypeError(f"Invalid value '{value!s}' for dtype '{self.dtype}'") def __setitem__(self, key, value) -> None: key = check_array_indexer(self, key) diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 2954edd93e343..01619dab7ce45 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -652,7 +652,8 @@ def _validate_scalar(self, value): return self.dtype.na_value elif not isinstance(value, str): raise TypeError( - f"Cannot set non-string value '{value}' into a string array." + f"Invalid value '{value}' for dtype '{self.dtype}'. Value should be a " + f"string or missing value, got '{type(value).__name__}' instead." ) return value @@ -743,7 +744,9 @@ def __setitem__(self, key, value) -> None: value = self.dtype.na_value elif not isinstance(value, str): raise TypeError( - f"Cannot set non-string value '{value}' into a StringArray." + f"Invalid value '{value}' for dtype '{self.dtype}'. Value should " + f"be a string or missing value, got '{type(value).__name__}' " + "instead." ) else: if not is_array_like(value): @@ -753,7 +756,10 @@ def __setitem__(self, key, value) -> None: # compatible, compatibility with arrow backed strings value = np.asarray(value) if len(value) and not lib.is_string_array(value, skipna=True): - raise TypeError("Must provide strings.") + raise TypeError( + "Invalid value for dtype 'str'. Value should be a " + "string or missing value (or array of those)." + ) mask = isna(value) if mask.any(): diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index 75e36feea2628..27c1425d11ac6 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -223,7 +223,10 @@ def insert(self, loc: int, item) -> ArrowStringArray: if self.dtype.na_value is np.nan and item is np.nan: item = libmissing.NA if not isinstance(item, str) and item is not libmissing.NA: - raise TypeError("Scalar must be NA or str") + raise TypeError( + f"Invalid value '{item}' for dtype 'str'. Value should be a " + f"string or missing value, got '{type(item).__name__}' instead." + ) return super().insert(loc, item) def _convert_bool_result(self, values, na=lib.no_default, method_name=None): @@ -255,13 +258,19 @@ def _maybe_convert_setitem_value(self, value): if isna(value): value = None elif not isinstance(value, str): - raise TypeError("Scalar must be NA or str") + raise TypeError( + f"Invalid value '{value}' for dtype 'str'. Value should be a " + f"string or missing value, got '{type(value).__name__}' instead." + ) else: value = np.array(value, dtype=object, copy=True) value[isna(value)] = None for v in value: if not (v is None or isinstance(v, str)): - raise TypeError("Must provide strings") + raise TypeError( + "Invalid value for dtype 'str'. Value should be a " + "string or missing value (or array of those)." + ) return super()._maybe_convert_setitem_value(value) def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]: diff --git a/pandas/tests/arrays/masked/test_indexing.py b/pandas/tests/arrays/masked/test_indexing.py index 37f38a11cbeae..753d562c87ffa 100644 --- a/pandas/tests/arrays/masked/test_indexing.py +++ b/pandas/tests/arrays/masked/test_indexing.py @@ -8,7 +8,7 @@ class TestSetitemValidation: def _check_setitem_invalid(self, arr, invalid): - msg = f"Invalid value '{invalid!s}' for dtype {arr.dtype}" + msg = f"Invalid value '{invalid!s}' for dtype '{arr.dtype}'" msg = re.escape(msg) with pytest.raises(TypeError, match=msg): arr[0] = invalid diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index a18161f47039b..a32ac7db4656a 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -109,14 +109,11 @@ def test_none_to_nan(cls, dtype): def test_setitem_validates(cls, dtype): arr = cls._from_sequence(["a", "b"], dtype=dtype) - if dtype.storage == "python": - msg = "Cannot set non-string value '10' into a StringArray." - else: - msg = "Scalar must be NA or str" + msg = "Invalid value '10' for dtype 'str" with pytest.raises(TypeError, match=msg): arr[0] = 10 - msg = "Must provide strings" + msg = "Invalid value for dtype 'str" with pytest.raises(TypeError, match=msg): arr[:] = np.array([1, 2]) @@ -508,10 +505,7 @@ def test_fillna_args(dtype): expected = pd.array(["a", "b"], dtype=dtype) tm.assert_extension_array_equal(res, expected) - if dtype.storage == "pyarrow": - msg = "Invalid value '1' for dtype str" - else: - msg = "Cannot set non-string value '1' into a StringArray." + msg = "Invalid value '1' for dtype 'str" with pytest.raises(TypeError, match=msg): arr.fillna(value=1) @@ -727,10 +721,7 @@ def test_setitem_scalar_with_mask_validation(dtype): # for other non-string we should also raise an error ser = pd.Series(["a", "b", "c"], dtype=dtype) - if dtype.storage == "python": - msg = "Cannot set non-string value" - else: - msg = "Scalar must be NA or str" + msg = "Invalid value '1' for dtype 'str" with pytest.raises(TypeError, match=msg): ser[mask] = 1 diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index eb14f8bdbfb86..84c01e0be3b6f 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1274,7 +1274,7 @@ def test_setting_mismatched_na_into_nullable_fails( r"timedelta64\[ns\] cannot be converted to (Floating|Integer)Dtype", r"datetime64\[ns\] cannot be converted to (Floating|Integer)Dtype", "'values' contains non-numeric NA", - r"Invalid value '.*' for dtype (U?Int|Float)\d{1,2}", + r"Invalid value '.*' for dtype '(U?Int|Float)\d{1,2}'", ] ) with pytest.raises(TypeError, match=msg): diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py index f399f71a9ce88..86b39ddd19ec1 100644 --- a/pandas/tests/frame/indexing/test_where.py +++ b/pandas/tests/frame/indexing/test_where.py @@ -931,7 +931,7 @@ def test_where_nullable_invalid_na(frame_or_series, any_numeric_ea_dtype): mask = np.array([True, True, False], ndmin=obj.ndim).T - msg = r"Invalid value '.*' for dtype (U?Int|Float)\d{1,2}" + msg = r"Invalid value '.*' for dtype '(U?Int|Float)\d{1,2}'" for null in tm.NP_NAT_OBJECTS + [pd.NaT]: # NaT is an NA value that we should *not* cast to pd.NA dtype @@ -1030,7 +1030,7 @@ def test_where_int_overflow(replacement, using_infer_string): df = DataFrame([[1.0, 2e25, "nine"], [np.nan, 0.1, None]]) if using_infer_string and replacement not in (None, "snake"): with pytest.raises( - TypeError, match="Cannot set non-string value|Scalar must be NA or str" + TypeError, match=f"Invalid value '{replacement}' for dtype 'str'" ): df.where(pd.notnull(df), replacement) return diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 36b08ee1df790..e0e9d4cfc5ccb 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1230,7 +1230,7 @@ def test_loc_setitem_str_to_small_float_conversion_type(self, using_infer_string # assigning with loc/iloc attempts to set the values inplace, which # in this case is successful if using_infer_string: - with pytest.raises(TypeError, match="Must provide strings"): + with pytest.raises(TypeError, match="Invalid value"): result.loc[result.index, "A"] = [float(x) for x in col_data] else: result.loc[result.index, "A"] = [float(x) for x in col_data] diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index d3246f43e991b..ed5cb5a8d1237 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -864,7 +864,7 @@ def test_index_where(self, obj, key, expected, raises, val, using_infer_string): mask[key] = True if using_infer_string and obj.dtype == object: - with pytest.raises(TypeError, match="Scalar must"): + with pytest.raises(TypeError, match="Invalid value"): Index(obj).where(~mask, val) else: res = Index(obj).where(~mask, val) @@ -877,7 +877,7 @@ def test_index_putmask(self, obj, key, expected, raises, val, using_infer_string mask[key] = True if using_infer_string and obj.dtype == object: - with pytest.raises(TypeError, match="Scalar must"): + with pytest.raises(TypeError, match="Invalid value"): Index(obj).putmask(mask, val) else: res = Index(obj).putmask(mask, val)