Skip to content

Commit 3173767

Browse files
committed
BUG: raise a proper exception when str.rsplit is passed a regex and clarify the docs
1 parent 364ca58 commit 3173767

File tree

2 files changed

+12
-4
lines changed

2 files changed

+12
-4
lines changed

pandas/core/strings/accessor.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -737,12 +737,14 @@ def cat(
737737
%(raises_split)s
738738
See Also
739739
--------
740-
Series.str.split : Split strings around given separator/delimiter.
740+
Series.str.split : Split strings around given separator/delimiter or
741+
regular expression.
741742
Series.str.rsplit : Splits string around given separator/delimiter,
742743
starting from the right.
743744
Series.str.join : Join lists contained as elements in the Series/Index
744745
with passed delimiter.
745-
str.split : Standard library version for split.
746+
re.split : Standard library version for split with ``regex=True``.
747+
str.split : Standard library version for split with ``regex=False``.
746748
str.rsplit : Standard library version for rsplit.
747749
748750
Notes
@@ -931,7 +933,7 @@ def split(
931933
% {
932934
"side": "end",
933935
"pat_regex": "",
934-
"pat_description": "String to split on",
936+
"pat_description": "String to split on. Does not support regex",
935937
"regex_argument": "",
936938
"raises_split": "",
937939
"regex_pat_note": "",
@@ -941,6 +943,9 @@ def split(
941943
)
942944
@forbid_nonstring_types(["bytes"])
943945
def rsplit(self, pat=None, *, n=-1, expand: bool = False):
946+
if pat is not None and not isinstance(pat, str):
947+
msg = f"expected a string object, not {type(pat).__name__}"
948+
raise TypeError(msg)
944949
result = self._data.array._str_rsplit(pat, n=n)
945950
dtype = object if self._data.dtype == object else None
946951
return self._wrap_result(

pandas/tests/strings/test_split_partition.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,13 +122,16 @@ def test_split_n(any_string_dtype, method, n):
122122

123123

124124
def test_rsplit(any_string_dtype):
125-
# regex split is not supported by rsplit
126125
values = Series(["a,b_c", "c_d,e", np.nan, "f,g,h"], dtype=any_string_dtype)
127126
result = values.str.rsplit("[,_]")
128127
exp = Series([["a,b_c"], ["c_d,e"], np.nan, ["f,g,h"]])
129128
exp = _convert_na_value(values, exp)
130129
tm.assert_series_equal(result, exp)
131130

131+
# regex split is not supported by rsplit
132+
with pytest.raises(TypeError, match="expected a string object, not Pattern"):
133+
values.str.rsplit(re.compile("[,_]"))
134+
132135

133136
def test_rsplit_max_number(any_string_dtype):
134137
# setting max number of splits, make sure it's from reverse

0 commit comments

Comments
 (0)