diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index c108808905dc7..f4fa32feae28a 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -737,12 +737,14 @@ def cat( %(raises_split)s See Also -------- - Series.str.split : Split strings around given separator/delimiter. + Series.str.split : Split strings around given separator/delimiter or + regular expression. Series.str.rsplit : Splits string around given separator/delimiter, starting from the right. Series.str.join : Join lists contained as elements in the Series/Index with passed delimiter. - str.split : Standard library version for split. + re.split : Standard library version for split with ``regex=True``. + str.split : Standard library version for split with ``regex=False``. str.rsplit : Standard library version for rsplit. Notes @@ -931,7 +933,7 @@ def split( % { "side": "end", "pat_regex": "", - "pat_description": "String to split on", + "pat_description": "String to split on. Does not support regex", "regex_argument": "", "raises_split": "", "regex_pat_note": "", @@ -941,6 +943,9 @@ def split( ) @forbid_nonstring_types(["bytes"]) def rsplit(self, pat=None, *, n=-1, expand: bool = False): + if pat is not None and not isinstance(pat, str): + msg = f"expected a string object, not {type(pat).__name__}" + raise TypeError(msg) result = self._data.array._str_rsplit(pat, n=n) dtype = object if self._data.dtype == object else None return self._wrap_result( diff --git a/pandas/tests/strings/test_split_partition.py b/pandas/tests/strings/test_split_partition.py index 4fab6e7778002..d5176df101141 100644 --- a/pandas/tests/strings/test_split_partition.py +++ b/pandas/tests/strings/test_split_partition.py @@ -122,13 +122,16 @@ def test_split_n(any_string_dtype, method, n): def test_rsplit(any_string_dtype): - # regex split is not supported by rsplit values = Series(["a,b_c", "c_d,e", np.nan, "f,g,h"], dtype=any_string_dtype) result = values.str.rsplit("[,_]") exp = Series([["a,b_c"], ["c_d,e"], np.nan, ["f,g,h"]]) exp = _convert_na_value(values, exp) tm.assert_series_equal(result, exp) + # regex split is not supported by rsplit + with pytest.raises(TypeError, match="expected a string object, not Pattern"): + values.str.rsplit(re.compile("[,_]")) + def test_rsplit_max_number(any_string_dtype): # setting max number of splits, make sure it's from reverse