Skip to content

Commit 79ecf26

Browse files
committed
Implement 'SRE_Pattern.split'.
1 parent 8bd62c1 commit 79ecf26

File tree

2 files changed

+75
-0
lines changed

2 files changed

+75
-0
lines changed

graalpython/com.oracle.graal.python.test/src/tests/test_re.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,59 @@ def test_symbolic_groups(self):
244244
self.checkPatternError(r'(?(-1))', "bad character in group name '-1'", 3)
245245
self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5))
246246

247+
def test_re_split(self):
248+
for string in ":a:b::c", S(":a:b::c"):
249+
self.assertTypedEqual(re.split(":", string),
250+
['', 'a', 'b', '', 'c'])
251+
self.assertTypedEqual(re.split(":+", string),
252+
['', 'a', 'b', 'c'])
253+
self.assertTypedEqual(re.split("(:+)", string),
254+
['', ':', 'a', ':', 'b', '::', 'c'])
255+
for string in (b":a:b::c", B(b":a:b::c"), bytearray(b":a:b::c"),
256+
memoryview(b":a:b::c")):
257+
self.assertTypedEqual(re.split(b":", string),
258+
[b'', b'a', b'b', b'', b'c'])
259+
self.assertTypedEqual(re.split(b":+", string),
260+
[b'', b'a', b'b', b'c'])
261+
self.assertTypedEqual(re.split(b"(:+)", string),
262+
[b'', b':', b'a', b':', b'b', b'::', b'c'])
263+
for a, b, c in ("\xe0\xdf\xe7", "\u0430\u0431\u0432",
264+
"\U0001d49c\U0001d49e\U0001d4b5"):
265+
string = ":%s:%s::%s" % (a, b, c)
266+
self.assertEqual(re.split(":", string), ['', a, b, '', c])
267+
self.assertEqual(re.split(":+", string), ['', a, b, c])
268+
self.assertEqual(re.split("(:+)", string),
269+
['', ':', a, ':', b, '::', c])
270+
271+
self.assertEqual(re.split("(?::+)", ":a:b::c"), ['', 'a', 'b', 'c'])
272+
self.assertEqual(re.split("(:)+", ":a:b::c"),
273+
['', ':', 'a', ':', 'b', ':', 'c'])
274+
self.assertEqual(re.split("([b:]+)", ":a:b::c"),
275+
['', ':', 'a', ':b::', 'c'])
276+
self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
277+
['', None, ':', 'a', None, ':', '', 'b', None, '',
278+
None, '::', 'c'])
279+
self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
280+
['', 'a', '', '', 'c'])
281+
282+
for sep, expected in [
283+
(':*', ['', 'a', 'b', 'c']),
284+
('(?::*)', ['', 'a', 'b', 'c']),
285+
('(:*)', ['', ':', 'a', ':', 'b', '::', 'c']),
286+
('(:)*', ['', ':', 'a', ':', 'b', ':', 'c']),
287+
]:
288+
with self.subTest(sep=sep), self.assertWarns(FutureWarning):
289+
self.assertTypedEqual(re.split(sep, ':a:b::c'), expected)
290+
291+
for sep, expected in [
292+
('', [':a:b::c']),
293+
(r'\b', [':a:b::c']),
294+
(r'(?=:)', [':a:b::c']),
295+
(r'(?<=:)', [':a:b::c']),
296+
]:
297+
with self.subTest(sep=sep), self.assertRaises(ValueError):
298+
self.assertTypedEqual(re.split(sep, ':a:b::c'), expected)
299+
247300
def test_ignore_case_set(self):
248301
self.assertTrue(re.match(r'[19A]', 'A', re.I))
249302
self.assertTrue(re.match(r'[19a]', 'a', re.I))

graalpython/lib-graalpython/_sre.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,28 @@ def sub(self, repl, string, count=0):
354354
except BaseException:
355355
return self.__compile_cpython_sre().sub(repl, string, count)
356356

357+
def split(self, string, maxsplit=0):
358+
n = 0
359+
try:
360+
pattern = self.__tregex_compile(self.pattern)
361+
result = []
362+
pos = 0
363+
progress = True
364+
while (maxsplit == 0 or n < maxsplit) and pos <= len(string) and progress:
365+
match_result = tregex_call_safe(pattern.exec, string, pos)
366+
if not match_result.isMatch:
367+
break
368+
n += 1
369+
start = match_result.start[0]
370+
end = match_result.end[0]
371+
result.append(string[pos:start])
372+
pos = end
373+
progress = (start != end)
374+
result.append(string[pos:])
375+
return result
376+
except BaseException:
377+
return self.__compile_cpython_sre().split(string, maxsplit)
378+
357379

358380
compile = SRE_Pattern
359381

0 commit comments

Comments
 (0)