Skip to content

Commit 29924ab

Browse files
committed
[GR-28410] More fixes for test_re
PullRequest: graalpython/1609
2 parents 18f1af3 + 673445e commit 29924ab

File tree

3 files changed

+114
-37
lines changed

3 files changed

+114
-37
lines changed

graalpython/com.oracle.graal.python.test/src/tests/unittest_tags/test_re.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
*graalpython.lib-python.3.test.test_re.ImplementationTest.test_overlap_table
33
*graalpython.lib-python.3.test.test_re.PatternReprTests.test_bytes
44
*graalpython.lib-python.3.test.test_re.PatternReprTests.test_flags_repr
5+
*graalpython.lib-python.3.test.test_re.PatternReprTests.test_inline_flags
56
*graalpython.lib-python.3.test.test_re.PatternReprTests.test_locale
67
*graalpython.lib-python.3.test.test_re.PatternReprTests.test_multiple_flags
78
*graalpython.lib-python.3.test.test_re.PatternReprTests.test_quotes
@@ -27,6 +28,7 @@
2728
*graalpython.lib-python.3.test.test_re.ReTests.test_bug_448951
2829
*graalpython.lib-python.3.test.test_re.ReTests.test_bug_449000
2930
*graalpython.lib-python.3.test.test_re.ReTests.test_bug_449964
31+
*graalpython.lib-python.3.test.test_re.ReTests.test_bug_581080
3032
*graalpython.lib-python.3.test.test_re.ReTests.test_bug_612074
3133
*graalpython.lib-python.3.test.test_re.ReTests.test_bug_6509
3234
*graalpython.lib-python.3.test.test_re.ReTests.test_bug_6561
@@ -40,22 +42,30 @@
4042
*graalpython.lib-python.3.test.test_re.ReTests.test_character_set_errors
4143
*graalpython.lib-python.3.test.test_re.ReTests.test_compile
4244
*graalpython.lib-python.3.test.test_re.ReTests.test_constants
45+
*graalpython.lib-python.3.test.test_re.ReTests.test_copying
4346
*graalpython.lib-python.3.test.test_re.ReTests.test_debug_flag
4447
*graalpython.lib-python.3.test.test_re.ReTests.test_dollar_matches_twice
48+
*graalpython.lib-python.3.test.test_re.ReTests.test_empty_array
4549
*graalpython.lib-python.3.test.test_re.ReTests.test_enum
4650
*graalpython.lib-python.3.test.test_re.ReTests.test_error
51+
*graalpython.lib-python.3.test.test_re.ReTests.test_expand
4752
*graalpython.lib-python.3.test.test_re.ReTests.test_flags
53+
*graalpython.lib-python.3.test.test_re.ReTests.test_getattr
54+
*graalpython.lib-python.3.test.test_re.ReTests.test_group
4855
*graalpython.lib-python.3.test.test_re.ReTests.test_group_name_in_exception
4956
*graalpython.lib-python.3.test.test_re.ReTests.test_groupdict
5057
*graalpython.lib-python.3.test.test_re.ReTests.test_ignore_case
5158
*graalpython.lib-python.3.test.test_re.ReTests.test_ignore_case_range
5259
*graalpython.lib-python.3.test.test_re.ReTests.test_ignore_case_set
5360
*graalpython.lib-python.3.test.test_re.ReTests.test_issue17998
61+
*graalpython.lib-python.3.test.test_re.ReTests.test_keep_buffer
62+
*graalpython.lib-python.3.test.test_re.ReTests.test_keyword_parameters
5463
*graalpython.lib-python.3.test.test_re.ReTests.test_large_search
5564
*graalpython.lib-python.3.test.test_re.ReTests.test_large_subn
5665
*graalpython.lib-python.3.test.test_re.ReTests.test_lookahead
5766
*graalpython.lib-python.3.test.test_re.ReTests.test_lookbehind
5867
*graalpython.lib-python.3.test.test_re.ReTests.test_match_getitem
68+
*graalpython.lib-python.3.test.test_re.ReTests.test_match_repr
5969
*graalpython.lib-python.3.test.test_re.ReTests.test_multiple_repeat
6070
*graalpython.lib-python.3.test.test_re.ReTests.test_not_literal
6171
*graalpython.lib-python.3.test.test_re.ReTests.test_nothing_to_repeat

graalpython/lib-graalpython/_sre.py

Lines changed: 102 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
# SOFTWARE.
3939

4040
from mmap import mmap
41+
from array import array
4142

4243
_mappingproxy = type(type.__dict__)
4344

@@ -76,7 +77,7 @@ def __init__(self, compiled_pattern, flags, sticky):
7677
self.__compiled_pattern__ = compiled_pattern
7778
self.__sticky__ = sticky
7879
self.pattern = compiled_pattern.pattern
79-
self.flags = flags
80+
self.flags = {name: bool(flags & flag) for flag, name in FLAG_NAMES}
8081
self.groupCount = 1 + compiled_pattern.groups
8182
self.groups = _NamedCaptureGroups(compiled_pattern.groupindex)
8283

@@ -110,7 +111,7 @@ def fallback_compiler(pattern, flags):
110111

111112
compiled_pattern = _sre_compile(pattern, bit_flags)
112113

113-
return _ExecutablePattern(compiled_pattern, flags, sticky)
114+
return _ExecutablePattern(compiled_pattern, bit_flags, sticky)
114115

115116
def _new_compile(p, flags=0):
116117
if _with_tregex and isinstance(p, (str, bytes)):
@@ -145,19 +146,19 @@ def setup(sre_compiler, error_class, flags_table):
145146
FLAG_DEBUG = 128
146147
FLAG_ASCII = 256
147148
FLAG_NAMES = [
148-
(FLAG_TEMPLATE, "re.TEMPLATE"),
149-
(FLAG_IGNORECASE, "re.IGNORECASE"),
150-
(FLAG_LOCALE, "re.LOCALE"),
151-
(FLAG_MULTILINE, "re.MULTILINE"),
152-
(FLAG_DOTALL, "re.DOTALL"),
153-
(FLAG_UNICODE, "re.UNICODE"),
154-
(FLAG_VERBOSE, "re.VERBOSE"),
155-
(FLAG_DEBUG, "re.DEBUG"),
156-
(FLAG_ASCII, "re.ASCII"),
149+
(FLAG_TEMPLATE, "TEMPLATE"),
150+
(FLAG_IGNORECASE, "IGNORECASE"),
151+
(FLAG_LOCALE, "LOCALE"),
152+
(FLAG_MULTILINE, "MULTILINE"),
153+
(FLAG_DOTALL, "DOTALL"),
154+
(FLAG_UNICODE, "UNICODE"),
155+
(FLAG_VERBOSE, "VERBOSE"),
156+
(FLAG_DEBUG, "DEBUG"),
157+
(FLAG_ASCII, "ASCII"),
157158
]
158159

159160

160-
class SRE_Match():
161+
class Match():
161162
def __init__(self, pattern, pos, endpos, result, input_str, compiled_regex):
162163
self.__result = result
163164
self.__compiled_regex = compiled_regex
@@ -191,10 +192,12 @@ def __getitem__(self, item):
191192

192193
def __groupidx(self, idx):
193194
try:
194-
if isinstance(idx, str):
195+
if hasattr(idx, '__index__'):
196+
int_idx = int(idx)
197+
if 0 <= int_idx < self.__compiled_regex.groupCount:
198+
return int_idx
199+
else:
195200
return self.__compiled_regex.groups[idx]
196-
elif 0 <= idx < self.__compiled_regex.groupCount:
197-
return idx
198201
except Exception:
199202
pass
200203
raise IndexError("no such group")
@@ -204,8 +207,10 @@ def __group(self, idx, default=None):
204207
start = self.__result.getStart(idxarg)
205208
if start < 0:
206209
return default
207-
else:
210+
elif isinstance(self.__input_str, str):
208211
return self.__input_str[start:self.__result.getEnd(idxarg)]
212+
else:
213+
return bytes(self.__input_str[start:self.__result.getEnd(idxarg)])
209214

210215
def groupdict(self, default=None):
211216
groups = self.__compiled_regex.groups
@@ -221,6 +226,14 @@ def start(self, groupnum=0):
221226
idxarg = self.__groupidx(groupnum)
222227
return self.__result.getStart(idxarg)
223228

229+
def expand(self, template):
230+
import re
231+
return re._expand(self.__re, self, template)
232+
233+
@property
234+
def regs(self):
235+
return tuple(self.span(i) for i in range(self.__compiled_regex.groupCount))
236+
224237
@property
225238
def string(self):
226239
return self.__input_str
@@ -252,7 +265,13 @@ def lastindex(self):
252265
return lastindex
253266

254267
def __repr__(self):
255-
return "<re.Match object; span=%r, match=%r>" % (self.span(), self.group())
268+
return "<%s object; span=%r, match=%r>" % (type(self).__name__, self.span(), self.group())
269+
270+
def __copy__(self):
271+
return self
272+
273+
def __deepcopy__(self, memo):
274+
return self
256275

257276
def _append_end_assert(pattern):
258277
if isinstance(pattern, str):
@@ -261,18 +280,18 @@ def _append_end_assert(pattern):
261280
return pattern if pattern.endswith(rb"\Z") else pattern + rb"\Z"
262281

263282
def _is_bytes_like(object):
264-
return isinstance(object, (bytes, bytearray, memoryview, mmap))
283+
return isinstance(object, (bytes, bytearray, memoryview, array, mmap))
265284

266-
class SRE_Pattern():
285+
class Pattern():
267286
def __init__(self, pattern, flags):
268287
self.__binary = isinstance(pattern, bytes)
269288
self.pattern = pattern
270-
self.flags = flags
289+
self.__input_flags = flags
271290
flags_str = []
272-
for char,flag in FLAGS.items():
291+
for char, flag in FLAGS.items():
273292
if flags & flag:
274293
flags_str.append(char)
275-
self.flags_str = "".join(flags_str)
294+
self.__flags_str = "".join(flags_str)
276295
self.__compiled_regexes = {}
277296
compiled_regex = self.__tregex_compile(self.pattern)
278297
self.groups = compiled_regex.groupCount - 1
@@ -283,6 +302,19 @@ def __init__(self, pattern, flags):
283302
group_names = dir(groups)
284303
self.groupindex = _mappingproxy({name: groups[name] for name in group_names})
285304

305+
@property
306+
def flags(self):
307+
# Flags can be spcified both in the flag argument or inline in the regex. Extract them back from the regex
308+
flags = self.__input_flags
309+
regex_flags = self.__tregex_compile(self.pattern).flags
310+
for flag, name in FLAG_NAMES:
311+
try:
312+
if regex_flags[name]:
313+
flags |= flag
314+
except KeyError:
315+
pass
316+
return flags
317+
286318
def __check_input_type(self, input):
287319
if not isinstance(input, str) and not _is_bytes_like(input):
288320
raise TypeError("expected string or bytes-like object")
@@ -298,7 +330,7 @@ def __check_pos(pos):
298330

299331
def __tregex_compile(self, pattern, flags=None):
300332
if flags is None:
301-
flags = self.flags_str
333+
flags = self.__flags_str
302334
if (pattern, flags) not in self.__compiled_regexes:
303335
try:
304336
self.__compiled_regexes[(pattern, flags)] = tregex_compile_internal(pattern, flags, fallback_compiler)
@@ -317,7 +349,7 @@ def __repr__(self):
317349
for code, name in FLAG_NAMES:
318350
if flags & code:
319351
flags -= code
320-
flag_items.append(name)
352+
flag_items.append(f're.{name}')
321353
if flags != 0:
322354
flag_items.append("0x%x" % flags)
323355
if len(flag_items) == 0:
@@ -331,15 +363,21 @@ def __repr__(self):
331363
def __eq__(self, other):
332364
if self is other:
333365
return True
334-
if type(other) != SRE_Pattern:
366+
if type(other) != Pattern:
335367
return NotImplemented
336368
return self.pattern == other.pattern and self.flags == other.flags
337369

338370
def __hash__(self):
339371
return hash(self.pattern) * 31 ^ hash(self.flags)
340372

373+
def __copy__(self):
374+
return self
375+
376+
def __deepcopy__(self, memo):
377+
return self
378+
341379
def _search(self, pattern, string, pos, endpos, sticky=False):
342-
pattern = self.__tregex_compile(pattern, self.flags_str + ("y" if sticky else ""))
380+
pattern = self.__tregex_compile(pattern, self.__flags_str + ("y" if sticky else ""))
343381
input_str = string
344382
if endpos == -1 or endpos >= len(string):
345383
endpos = len(string)
@@ -348,7 +386,7 @@ def _search(self, pattern, string, pos, endpos, sticky=False):
348386
input_str = string[:endpos]
349387
result = tregex_call_exec(pattern.exec, input_str, min(pos, endpos % len(string) + 1))
350388
if result.isMatch:
351-
return SRE_Match(self, pos, endpos, result, input_str, pattern)
389+
return Match(self, pos, endpos, result, input_str, pattern)
352390
else:
353391
return None
354392

@@ -389,7 +427,7 @@ def finditer(self, string, pos=0, endpos=-1):
389427
if not result.isMatch:
390428
break
391429
else:
392-
yield SRE_Match(self, pos, endpos, result, string, compiled_regex)
430+
yield Match(self, pos, endpos, result, string, compiled_regex)
393431
no_progress = (result.getStart(0) == result.getEnd(0))
394432
pos = result.getEnd(0) + no_progress
395433
return
@@ -411,7 +449,7 @@ def findall(self, string, pos=0, endpos=-1):
411449
elif compiled_regex.groupCount == 2:
412450
matchlist.append(self.__sanitize_out_type(string[result.getStart(1):result.getEnd(1)]))
413451
else:
414-
matchlist.append(tuple(map(self.__sanitize_out_type, SRE_Match(self, pos, endpos, result, string, compiled_regex).groups())))
452+
matchlist.append(tuple(map(self.__sanitize_out_type, Match(self, pos, endpos, result, string, compiled_regex).groups())))
415453
no_progress = (result.getStart(0) == result.getEnd(0))
416454
pos = result.getEnd(0) + no_progress
417455
return matchlist
@@ -433,11 +471,10 @@ def subn(self, repl, string, count=0):
433471
else:
434472
literal = b'\\' not in repl
435473
if not literal:
436-
import sre_parse
437-
template = sre_parse.parse_template(repl, self)
438-
439-
def repl(match):
440-
return sre_parse.expand_template(template, match)
474+
import re
475+
repl = re._subx(self, repl)
476+
if not callable(repl):
477+
literal = True
441478

442479
while (count == 0 or n < count) and pos <= len(string):
443480
match_result = tregex_call_exec(pattern.exec, string, pos)
@@ -450,7 +487,7 @@ def repl(match):
450487
if literal:
451488
result.append(repl)
452489
else:
453-
_srematch = SRE_Match(self, pos, -1, match_result, string, pattern)
490+
_srematch = Match(self, pos, -1, match_result, string, pattern)
454491
_repl = repl(_srematch)
455492
result.append(_repl)
456493
pos = end
@@ -492,8 +529,37 @@ def split(self, string, maxsplit=0):
492529
result.append(self.__sanitize_out_type(string[collect_pos:]))
493530
return result
494531

532+
def scanner(self, string, pos=0, endpos=None):
533+
return SREScanner(self, string, pos, endpos)
534+
535+
536+
class SREScanner(object):
537+
def __init__(self, pattern, string, start, end):
538+
self.pattern = pattern
539+
self._string = string
540+
self._start = start
541+
self._end = end
542+
543+
def _match_search(self, matcher):
544+
if self._start > len(self._string):
545+
return None
546+
match = matcher(self._string, self._start, self._end)
547+
if match is None:
548+
self._start += 1
549+
else:
550+
self._start = match.end()
551+
if match.start() == self._start:
552+
self._start += 1
553+
return match
554+
555+
def match(self):
556+
return self._match_search(self.pattern.match)
557+
558+
def search(self):
559+
return self._match_search(self.pattern.search)
560+
495561

496-
_t_compile = SRE_Pattern
562+
_t_compile = Pattern
497563

498564
def compile(pattern, flags, code, groups, groupindex, indexgroup):
499565
import _cpython_sre

graalpython/lib-python/3/test/test_re.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from test.support import (gc_collect, bigmemtest, _2G,
2-
cpython_only, captured_stdout)
2+
cpython_only, captured_stdout, impl_detail)
33
import locale
44
import re
55
import sre_compile
@@ -53,6 +53,7 @@ def checkTemplateError(self, pattern, repl, string, errmsg, pos=None):
5353
if pos is not None:
5454
self.assertEqual(err.pos, pos)
5555

56+
@impl_detail("buffer locking", graalvm=False)
5657
def test_keep_buffer(self):
5758
# See bug 14212
5859
b = bytearray(b'x')

0 commit comments

Comments
 (0)