Skip to content

Commit ba0b69e

Browse files
committed
[GR-21590] Update imports
PullRequest: graalpython/2666
2 parents ad00b91 + d3c76ca commit ba0b69e

File tree

3 files changed

+56
-10
lines changed

3 files changed

+56
-10
lines changed

ci.jsonnet

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{ "overlay": "23eced0a479624cb01e2b179a569d13d60b6bc6d" }
1+
{ "overlay": "c498f3758b4d1da5de7cf369c00dc4013b9215c3" }

graalpython/lib-graalpython/_sre.py

Lines changed: 51 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved.
1+
# Copyright (c) 2018, 2023, Oracle and/or its affiliates. All rights reserved.
22
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
33
#
44
# The Universal Permissive License (UPL), Version 1.0
@@ -67,6 +67,15 @@ def _normalize_bounds(string, pos, endpos):
6767
def _is_bytes_like(object):
6868
return isinstance(object, (bytes, bytearray, memoryview, array, mmap))
6969

70+
def _getlocale():
71+
from locale import getlocale
72+
(lang, encoding) = getlocale()
73+
if lang is None and charset is None:
74+
return 'C'
75+
if lang is None:
76+
lang = 'en_US'
77+
return '.'.join((lang, encoding))
78+
7079
def _new_compile(p, flags=0):
7180
if _with_tregex and isinstance(p, (str, bytes, bytearray, memoryview, array, mmap)):
7281
return _t_compile(p, flags)
@@ -237,6 +246,7 @@ def __init__(self, pattern, flags):
237246
self.__binary = _is_bytes_like(pattern)
238247
self.pattern = pattern
239248
self.__input_flags = flags
249+
self.__locale_sensitive = self.__is_locale_sensitive(pattern, flags)
240250
flags_str = []
241251
for char, flag in FLAGS.items():
242252
if flags & flag:
@@ -290,11 +300,19 @@ def __check_input_type(self, input):
290300
raise TypeError("cannot use a bytes pattern on a string-like object")
291301

292302
def __tregex_compile(self, method="search", must_advance=False):
293-
if (method, must_advance) not in self.__compiled_regexes:
303+
if self.__locale_sensitive:
304+
key = (method, must_advance, _getlocale())
305+
else:
306+
key = (method, must_advance)
307+
if key not in self.__compiled_regexes:
294308
try:
295-
extra_options = f"PythonMethod={method},MustAdvance={'true' if must_advance else 'false'}"
309+
if self.__locale_sensitive:
310+
locale_option = ",PythonLocale=" + key[2]
311+
else:
312+
locale_option = ""
313+
extra_options = f"PythonMethod={method},MustAdvance={'true' if must_advance else 'false'}{locale_option}"
296314
compiled_regex = tregex_compile_internal(self.pattern, self.__flags_str, extra_options)
297-
self.__compiled_regexes[(method, must_advance)] = compiled_regex
315+
self.__compiled_regexes[key] = compiled_regex
298316
except ValueError as e:
299317
if len(e.args) == 2:
300318
msg = e.args[0]
@@ -307,7 +325,35 @@ def __tregex_compile(self, method="search", must_advance=False):
307325
raise ValueError(msg) from None
308326
raise error(msg, self.pattern, e.args[1]) from None
309327
raise
310-
return self.__compiled_regexes[(method, must_advance)]
328+
return self.__compiled_regexes[key]
329+
330+
def __is_locale_sensitive(self, pattern, flags):
331+
"""Tests whether the regex is locale-sensitive. It is not completely precise. In some
332+
instances, it will return `True` even though the regex is *not* locale-sensitive. This is
333+
the case when sequences resembling inline flags appear in character classes or comments."""
334+
if not _is_bytes_like(pattern):
335+
return False
336+
if flags & FLAG_LOCALE != 0:
337+
return True
338+
pattern = pattern.decode(encoding='LATIN-1')
339+
position = 0
340+
while position < len(pattern):
341+
position = pattern.find('(?', position)
342+
if position == -1:
343+
break
344+
backslash_position = position - 1
345+
while backslash_position >= 0 and pattern[backslash_position] == '\\':
346+
backslash_position = backslash_position - 1
347+
# jump over '(?'
348+
position = position + 2
349+
if (position - backslash_position) % 2 == 0:
350+
# found odd number of backslashes, the parentheses is a literal
351+
continue
352+
while position < len(pattern) and pattern[position] in 'aiLmsux':
353+
if pattern[position] == 'L':
354+
return True
355+
position = position + 1
356+
return False
311357

312358
def __fallback_compile(self):
313359
if self.__compiled_fallback is None:

mx.graalpython/suite.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,31 +44,31 @@
4444
},
4545
{
4646
"name": "sdk",
47-
"version": "754fbaf4c0da8c422871721fd80c218f223220e6",
47+
"version": "39e7121bc091302c6e71c2df67a35e5eb3415ef8",
4848
"subdir": True,
4949
"urls": [
5050
{"url": "https://github.com/oracle/graal", "kind": "git"},
5151
]
5252
},
5353
{
5454
"name": "tools",
55-
"version": "754fbaf4c0da8c422871721fd80c218f223220e6",
55+
"version": "39e7121bc091302c6e71c2df67a35e5eb3415ef8",
5656
"subdir": True,
5757
"urls": [
5858
{"url": "https://github.com/oracle/graal", "kind": "git"},
5959
],
6060
},
6161
{
6262
"name": "sulong",
63-
"version": "754fbaf4c0da8c422871721fd80c218f223220e6",
63+
"version": "39e7121bc091302c6e71c2df67a35e5eb3415ef8",
6464
"subdir": True,
6565
"urls": [
6666
{"url": "https://github.com/oracle/graal", "kind": "git"},
6767
]
6868
},
6969
{
7070
"name": "regex",
71-
"version": "754fbaf4c0da8c422871721fd80c218f223220e6",
71+
"version": "39e7121bc091302c6e71c2df67a35e5eb3415ef8",
7272
"subdir": True,
7373
"urls": [
7474
{"url": "https://github.com/oracle/graal", "kind": "git"},

0 commit comments

Comments
 (0)