Skip to content

Commit 62f9752

Browse files
author
Franziska Geiger
committed
[GR-17590] [GR-17591][GR-16846] regex problem with None values and radd problem with custom string class
PullRequest: graalpython/605
2 parents 8d27b51 + 112cafc commit 62f9752

File tree

5 files changed

+67
-8
lines changed

5 files changed

+67
-8
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ wrapper scripts and makes the implementation usable from shell as standard Pytho
2121
execute the following command in the project directory:
2222

2323
```
24-
mx graalpython -m venv <dir-to-venv>
24+
mx python -m venv <dir-to-venv>
2525
```
2626

2727
To activate the environment in your shell session call:

graalpython/com.oracle.graal.python.test/src/tests/test_re.py

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,15 @@ def test_json_bytes_re_compile():
8282
else:
8383
assert False, "searching a bytes-pattern in a str did not raise"
8484

85+
def test_none_value():
86+
regex_find = re.compile(
87+
r"(//?| ==?)|([[]]+)").findall
88+
stream = iter([ (special,text)
89+
for (special,text) in regex_find('[]')
90+
if special or text ])
91+
n = next(stream)
92+
assert not n[0]
93+
assert str(n[0]) == 'None'
8594

8695
class S(str):
8796
def __getitem__(self, index):
@@ -370,7 +379,7 @@ def test_getattr(self):
370379
self.assertEqual(re.compile("(?i)(a)(b)").pattern, "(?i)(a)(b)")
371380
# TODO at the moment, we use slightly different default flags
372381
#self.assertEqual(re.compile("(?i)(a)(b)").flags, re.I | re.U)
373-
382+
374383
# TODO re-enable this test once TRegex provides this property
375384
#self.assertEqual(re.compile("(?i)(a)(b)").groups, 2)
376385
self.assertEqual(re.compile("(?i)(a)(b)").groupindex, {})
@@ -390,7 +399,7 @@ def test_getattr(self):
390399
p = re.compile(r'(?i)(?P<first>a)(?P<other>b)')
391400
self.assertEqual(sorted(p.groupindex), ['first', 'other'])
392401
self.assertEqual(p.groupindex['other'], 2)
393-
402+
394403
if sys.version_info.minor >= 6:
395404
with self.assertRaises(TypeError):
396405
p.groupindex['other'] = 0
@@ -438,7 +447,15 @@ def test_escaping(self):
438447
self.assertTrue(match)
439448
assert "frac" in match.groupdict()
440449
assert match.groupdict()["frac"] == "1"
441-
442-
450+
451+
443452
def test_escape(self):
444453
self.assertEqual(re.escape(" ()"), "\\ \\(\\)")
454+
455+
def test_finditer_empty_string(self):
456+
regex = re.compile(
457+
r"(//?| ==?)|([[]]+)")
458+
for m in regex.finditer(''):
459+
self.fail()
460+
461+

graalpython/com.oracle.graal.python.test/src/tests/test_string.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1096,3 +1096,34 @@ def test_strip_with_sep():
10961096
assertRaises(TypeError, 'hello', 'strip', 42, 42)
10971097
assertRaises(TypeError, 'hello', 'lstrip', 42, 42)
10981098
assertRaises(TypeError, 'hello', 'rstrip', 42, 42)
1099+
1100+
class EncodedString(str):
1101+
# unicode string subclass to keep track of the original encoding.
1102+
# 'encoding' is None for unicode strings and the source encoding
1103+
# otherwise
1104+
encoding = None
1105+
1106+
def __deepcopy__(self, memo):
1107+
return self
1108+
1109+
def byteencode(self):
1110+
assert self.encoding is not None
1111+
return self.encode(self.encoding)
1112+
1113+
def utf8encode(self):
1114+
assert self.encoding is None
1115+
return self.encode("UTF-8")
1116+
1117+
@property
1118+
def is_unicode(self):
1119+
return self.encoding is None
1120+
1121+
def contains_surrogates(self):
1122+
return string_contains_surrogates(self)
1123+
1124+
def as_utf8_string(self):
1125+
return bytes_literal(self.utf8encode(), 'utf8')
1126+
1127+
def test_radd():
1128+
val = EncodedString('abc')
1129+
assert 'cde' + val == 'cdeabc'

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/StringBuiltins.java

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,10 @@ public abstract static class AddNode extends PythonBinaryBuiltinNode {
345345
protected final ConditionProfile rightProfile1 = ConditionProfile.createBinaryProfile();
346346
protected final ConditionProfile rightProfile2 = ConditionProfile.createBinaryProfile();
347347

348+
public static AddNode create() {
349+
return StringBuiltinsFactory.AddNodeFactory.create();
350+
}
351+
348352
@Specialization(guards = "!concatGuard(self, other)")
349353
String doSSSimple(String self, String other) {
350354
if (LazyString.length(self, leftProfile1, leftProfile2) == 0) {
@@ -369,7 +373,7 @@ Object doSSSimple(PString self, String other) {
369373
return self;
370374
}
371375

372-
@Specialization(guards = "!concatGuard(self.getCharSequence(), self.getCharSequence())")
376+
@Specialization(guards = "!concatGuard(self.getCharSequence(), other.getCharSequence())")
373377
PString doSSSimple(PString self, PString other) {
374378
if (LazyString.length(self.getCharSequence(), leftProfile1, leftProfile2) == 0) {
375379
return other;
@@ -447,7 +451,12 @@ protected boolean concatGuard(CharSequence left, CharSequence right) {
447451

448452
@Builtin(name = __RADD__, minNumOfPositionalArgs = 2)
449453
@GenerateNodeFactory
450-
public abstract static class RAddNode extends AddNode {
454+
public abstract static class RAddNode extends PythonBinaryBuiltinNode {
455+
@Specialization
456+
Object doAll(VirtualFrame frame, Object left, Object right,
457+
@Cached("create()") AddNode addNode) {
458+
return addNode.execute(frame, right, left);
459+
}
451460
}
452461

453462
// str.startswith(prefix[, start[, end]])

graalpython/lib-graalpython/_sre.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -339,12 +339,14 @@ def __sanitize_out_type(self, elem):
339339
returned list if always either 'str' or 'bytes'."""
340340
if self.__binary:
341341
return bytes(elem)
342+
elif elem is None:
343+
return None
342344
else:
343345
return str(elem)
344346

345347
def finditer(self, string, pos=0, endpos=-1):
346348
self.__check_input_type(string)
347-
if endpos > len(string):
349+
if endpos > len(string) or len(string) == 0:
348350
endpos = len(string)
349351
elif endpos < 0:
350352
endpos = endpos % len(string) + 1

0 commit comments

Comments
 (0)