Skip to content

Commit bfa8a4d

Browse files
committed
intrinsified python_cext PyUnicode_XXX
1 parent 2264b41 commit bfa8a4d

File tree

6 files changed

+699
-109
lines changed

6 files changed

+699
-109
lines changed

graalpython/com.oracle.graal.python.test/src/tests/cpyext/test_unicode.py

Lines changed: 95 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,56 @@ def _reference_contains(args):
9898
raise TypeError
9999
return args[1] in args[0]
100100

101+
def _reference_compare(args):
102+
if not isinstance(args[0], str) or not isinstance(args[1], str):
103+
if sys.version_info.minor >= 6:
104+
raise SystemError
105+
else:
106+
raise TypeError
107+
108+
if args[0] == args[1]:
109+
return 0
110+
elif args[0] < args[1]:
111+
return -1
112+
else:
113+
return 1
114+
115+
def _reference_as_encoded_string(args):
116+
if not isinstance(args[0], str):
117+
if sys.version_info.minor >= 6:
118+
raise SystemError
119+
else:
120+
raise TypeError
121+
122+
s = args[0]
123+
encoding = args[1]
124+
errors = args[2]
125+
return s.encode(encoding, errors)
126+
127+
_codecs_module = None
128+
def _reference_as_unicode_escape_string(args):
129+
if not isinstance(args[0], str):
130+
if sys.version_info.minor >= 6:
131+
raise SystemError
132+
else:
133+
raise TypeError
134+
global _codecs_module
135+
if not _codecs_module:
136+
import _codecs as _codecs_module
137+
return _codecs_module.unicode_escape_encode(args[0])[0]
138+
139+
def _reference_tailmatch(args):
140+
if not isinstance(args[0], str) or not isinstance(args[1], str):
141+
return -1;
142+
143+
s = args[0]
144+
substr = args[1]
145+
start = args[2]
146+
end = args[3]
147+
direction = args[4]
148+
if direction > 0:
149+
return 1 if s[start:end].endswith(substr) else 0
150+
return 1 if s[start:end].startswith(substr) else 0
101151

102152
class CustomString(str):
103153
pass
@@ -509,6 +559,37 @@ def compile_module(self, name):
509559
arguments=["PyObject* str", "PyObject* seq"],
510560
cmpfunc=unhandled_error_compare
511561
)
562+
563+
test_PyUnicode_Compare = CPyExtFunction(
564+
_reference_compare,
565+
lambda: (
566+
("a", "a"),
567+
("a", "b"),
568+
("a", None),
569+
("a", 1),
570+
),
571+
resultspec="i",
572+
argspec='OO',
573+
arguments=["PyObject* left", "PyObject* right"],
574+
cmpfunc=unhandled_error_compare
575+
)
576+
577+
test_PyUnicode_Tailmatch = CPyExtFunction(
578+
_reference_tailmatch,
579+
lambda: (
580+
("abc", "a", 0, 1, 0),
581+
("abc", "a", 0, 1, 1),
582+
("abc", "a", 0, 0, 1),
583+
("abc", "c", 0, 1, 0),
584+
("abc", "c", 0, 1, 1),
585+
("abc", None, 0, 1, 1),
586+
("abc", 1, 1, 0, 1),
587+
),
588+
resultspec="i",
589+
argspec='OOiii',
590+
arguments=["PyObject* left", "PyObject* right", "Py_ssize_t start", "Py_ssize_t end", "int direction"],
591+
cmpfunc=unhandled_error_compare
592+
)
512593

513594
test_PyUnicode_FromOrdinal = CPyExtFunction(
514595
lambda args: chr(args[0]),
@@ -545,21 +626,34 @@ def compile_module(self, name):
545626

546627

547628
test_PyUnicode_AsEncodedString = CPyExtFunction(
548-
lambda args: args[0].encode(args[1], args[2]),
629+
_reference_as_encoded_string,
549630
lambda: (
550631
("abcd", "ascii", "report"),
551632
("abcd", "utf8", "report"),
552633
("öüä", "ascii", "report"),
553634
("öüä", "utf8", "report"),
554635
("öüä", "ascii", "ignore"),
555636
("öüä", "ascii", "replace"),
637+
(1, "ascii", "replace"),
556638
),
557639
resultspec="O",
558640
argspec='Oss',
559641
arguments=["PyObject* str", "const char* encoding", "const char* errors"],
560642
cmpfunc=unhandled_error_compare
561643
)
562644

645+
test_PyUnicode_AsUnicodeEscapeString = CPyExtFunction(
646+
_reference_as_unicode_escape_string,
647+
lambda: (
648+
("abcd",),
649+
("öüä",),
650+
(1,),
651+
),
652+
resultspec="O",
653+
argspec='O',
654+
arguments=["PyObject* s"],
655+
cmpfunc=unhandled_error_compare
656+
)
563657

564658
# NOTE: this test assumes that Python uses UTF-8 encoding for source files
565659
test_PyUnicode_FromWideChar = CPyExtFunction(

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/CodecsModuleBuiltins.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1335,7 +1335,7 @@ Object encode(VirtualFrame frame, Object obj, Object errors,
13351335

13361336
@Builtin(name = "unicode_escape_encode", minNumOfPositionalArgs = 1, parameterNames = {"obj", "errors"})
13371337
@GenerateNodeFactory
1338-
abstract static class UnicodeEscapeEncodeNode extends PythonBinaryBuiltinNode {
1338+
public abstract static class UnicodeEscapeEncodeNode extends PythonBinaryBuiltinNode {
13391339
@Specialization
13401340
Object encode(VirtualFrame frame, Object obj, Object errors,
13411341
@Cached CodecsEncodeNode encode) {

0 commit comments

Comments
 (0)