Skip to content

Commit d88698b

Browse files
committed
Raise re.error on invalid escapes when calling re.sub
Also remove test case which is failed by CPython 3.4.1 in the gate.
1 parent 7218b37 commit d88698b

File tree

3 files changed

+18
-8
lines changed
  • graalpython

3 files changed

+18
-8
lines changed

graalpython/com.oracle.graal.python.test/src/tests/test_re.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -159,8 +159,13 @@ def test_basic_re_sub(self):
159159
self.assertEqual(re.sub('a', '\t\n\v\r\f\a\b', 'a'),
160160
(chr(9) + chr(10) + chr(11) + chr(13) + chr(12) + chr(7) + chr(8)))
161161

162-
for c in 'cdehijklmopqsuwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ':
163-
self.assertEqual(re.sub('a', '\\' + c, 'a'), '\\' + c)
162+
# The following behavior is correct w.r.t. Python 3.7. However, currently
163+
# the gate uses CPython 3.4.1 to validate the test suite,
164+
# which does not pass this test case, so we have to skip.
165+
# for c in 'cdehijklmopqsuwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ':
166+
# with self.assertRaises(re.error):
167+
# self.assertEqual(re.sub('a', '\\' + c, 'a'), '\\' + c)
168+
164169

165170
self.assertEqual(re.sub(r'^\s*', 'X', 'test'), 'Xtest')
166171

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/bytes/BytesUtils.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,9 @@ public static StringBuilder decodeEscapes(ParserErrorCallback errors, String str
199199
}
200200
throw errors.raise(ValueError, "invalid \\x escape at position %d", i);
201201
default:
202-
if (regexMode) {
202+
if (regexMode && (chr == '\\' || chr == 'g' || (chr >= '0' && chr <= '9'))) {
203+
// only allow backslashes, named group references and numbered group
204+
// references in regex mode
203205
charList.append('\\');
204206
charList.append(chr);
205207
} else {

graalpython/lib-graalpython/_sre.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -336,26 +336,26 @@ def group(match_result, group_nr, string):
336336
while pos != -1 and start < n:
337337
if pos+1 < n:
338338
if repl[pos + 1].isdigit() and match_result.groupCount > 0:
339+
# TODO: Should handle backreferences longer than 1 digit and fall back to octal escapes.
339340
group_nr = int(repl[pos+1].decode('ascii')) if self.__binary else int(repl[pos+1])
340341
group_str = group(match_result, group_nr, string)
341342
if group_str is None:
342-
raise ValueError("invalid group reference %s at position %s" % (group_nr, pos))
343+
raise error("invalid group reference %s at position %s" % (group_nr, pos))
343344
result += repl[start:pos] + group_str
344345
start = pos + 2
345346
elif repl[pos + 1] == (b'g' if self.__binary else 'g'):
346347
group_ref, group_ref_end, digits_only = self.__extract_groupname(repl, pos + 2)
347348
if group_ref:
348349
group_str = group(match_result, int(group_ref) if digits_only else pattern.groups[group_ref], string)
349350
if group_str is None:
350-
raise ValueError("invalid group reference %s at position %s" % (group_ref, pos))
351+
raise error("invalid group reference %s at position %s" % (group_ref, pos))
351352
result += repl[start:pos] + group_str
352353
start = group_ref_end + 1
353354
elif repl[pos + 1] == backslash:
354355
result += repl[start:pos] + backslash
355356
start = pos + 2
356357
else:
357-
result += repl[start:pos + 2]
358-
start = pos + 2
358+
assert False, "unexpected escape in re.sub"
359359
pos = repl.find(backslash, start)
360360
result += repl[start:]
361361
return result
@@ -386,7 +386,10 @@ def sub(self, repl, string, count=0):
386386
is_string_rep = isinstance(repl, str) or _is_bytes_like(repl)
387387
if is_string_rep:
388388
self.__check_input_type(repl)
389-
repl = _process_escape_sequences(repl)
389+
try:
390+
repl = _process_escape_sequences(repl)
391+
except ValueError as e:
392+
raise error(str(e))
390393
while (count == 0 or n < count) and pos <= len(string):
391394
match_result = tregex_call_exec(pattern.exec, string, pos)
392395
if not match_result.isMatch:

0 commit comments

Comments
 (0)