1
1
from __future__ import annotations
2
2
3
3
from functools import partial
4
+ import re
4
5
from typing import (
5
6
TYPE_CHECKING ,
6
7
Any ,
@@ -159,6 +160,33 @@ def _str_slice_replace(
159
160
stop = np .iinfo (np .int64 ).max
160
161
return type (self )(pc .utf8_replace_slice (self ._pa_array , start , stop , repl ))
161
162
163
+ def _str_replace (
164
+ self ,
165
+ pat : str | re .Pattern ,
166
+ repl : str | Callable ,
167
+ n : int = - 1 ,
168
+ case : bool = True ,
169
+ flags : int = 0 ,
170
+ regex : bool = True ,
171
+ ) -> Self :
172
+ if isinstance (pat , re .Pattern ) or callable (repl ) or not case or flags :
173
+ raise NotImplementedError (
174
+ "replace is not supported with a re.Pattern, callable repl, "
175
+ "case=False, or flags!=0"
176
+ )
177
+
178
+ func = pc .replace_substring_regex if regex else pc .replace_substring
179
+ # https://github.com/apache/arrow/issues/39149
180
+ # GH 56404, unexpected behavior with negative max_replacements with pyarrow.
181
+ pa_max_replacements = None if n < 0 else n
182
+ result = func (
183
+ self ._pa_array ,
184
+ pattern = pat ,
185
+ replacement = repl ,
186
+ max_replacements = pa_max_replacements ,
187
+ )
188
+ return type (self )(result )
189
+
162
190
def _str_capitalize (self ) -> Self :
163
191
return type (self )(pc .utf8_capitalize (self ._pa_array ))
164
192
@@ -168,6 +196,16 @@ def _str_title(self) -> Self:
168
196
def _str_swapcase (self ) -> Self :
169
197
return type (self )(pc .utf8_swapcase (self ._pa_array ))
170
198
199
+ def _str_removeprefix (self , prefix : str ):
200
+ if not pa_version_under13p0 :
201
+ starts_with = pc .starts_with (self ._pa_array , pattern = prefix )
202
+ removed = pc .utf8_slice_codeunits (self ._pa_array , len (prefix ))
203
+ result = pc .if_else (starts_with , removed , self ._pa_array )
204
+ return type (self )(result )
205
+ predicate = lambda val : val .removeprefix (prefix )
206
+ result = self ._apply_elementwise (predicate )
207
+ return type (self )(pa .chunked_array (result ))
208
+
171
209
def _str_removesuffix (self , suffix : str ):
172
210
ends_with = pc .ends_with (self ._pa_array , pattern = suffix )
173
211
removed = pc .utf8_slice_codeunits (self ._pa_array , 0 , stop = - len (suffix ))
0 commit comments