Skip to content

Commit 48b72e7

Browse files
committed
Avoid upcall arguments conversion for unicode decoder
1 parent 66c2d7b commit 48b72e7

File tree

3 files changed

+36
-4
lines changed

3 files changed

+36
-4
lines changed

graalpython/com.oracle.graal.python.cext/src/unicodeobject.c

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -516,9 +516,15 @@ PyObject * PyUnicode_DecodeUTF8(const char *s, Py_ssize_t size, const char *erro
516516
return PyUnicode_DecodeUTF8Stateful(s, size, errors, NULL);
517517
}
518518

519-
UPCALL_ID(PyUnicode_DecodeUTF8Stateful);
519+
typedef PyObject* (*unicode_DecodeUTF8Stateful_fun_t)(void *data, const char *errors, int consumed);
520+
UPCALL_TYPED_ID(PyUnicode_DecodeUTF8Stateful, unicode_DecodeUTF8Stateful_fun_t);
520521
PyObject * PyUnicode_DecodeUTF8Stateful(const char *s, Py_ssize_t size, const char *errors, Py_ssize_t *consumed) {
521-
PyObject* result = UPCALL_CEXT_O(_jls_PyUnicode_DecodeUTF8Stateful, polyglot_from_i8_array(s, size), polyglot_from_string(errors, SRC_CS), consumed != NULL ? 1 : 0);
522+
// This does deliberately not use UPCALL_CEXT_O to avoid argument conversion since
523+
// 'PyUnicode_DecodeUTF8Stateful' really expects the bare pointer.
524+
PyObject* result = _jls_PyUnicode_DecodeUTF8Stateful(
525+
polyglot_from_i8_array(s, size),
526+
polyglot_from_string(errors, SRC_CS),
527+
consumed != NULL ? 1 : 0);
522528
if (result != NULL) {
523529
if (consumed != NULL) {
524530
*consumed = PyLong_AsSsize_t(PyTuple_GetItem(result, 1));
@@ -532,7 +538,10 @@ PyObject * PyUnicode_DecodeUTF8Stateful(const char *s, Py_ssize_t size, const ch
532538
// partially taken from CPython "Python/Objects/unicodeobject.c"
533539
PyObject * _PyUnicode_FromId(_Py_Identifier *id) {
534540
if (!id->object) {
535-
id->object = PyUnicode_DecodeUTF8Stateful(id->string, strlen(id->string), NULL, NULL);
541+
id->object = PyUnicode_DecodeUTF8Stateful(id->string,
542+
strlen(id->string),
543+
"strict",
544+
NULL);
536545
if (!id->object) {
537546
return NULL;
538547
}

graalpython/com.oracle.graal.python.test/src/tests/cpyext/test_unicode.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,24 @@ def compile_module(self, name):
304304
cmpfunc=unhandled_error_compare
305305
)
306306

307+
test_PyUnicode_DecodeUTF8Stateful = CPyExtFunction(
308+
lambda args: args[0],
309+
lambda: (
310+
("_type_", ),
311+
),
312+
code="""PyObject* wrap_PyUnicode_DecodeUTF8Stateful(PyObject* _type_str) {
313+
_Py_IDENTIFIER(_type_);
314+
// _PyUnicode_FromId --> PyUnicode_DecodeUTF8Stateful
315+
return _PyUnicode_FromId(&PyId__type_);
316+
}
317+
""",
318+
resultspec="O",
319+
argspec='O',
320+
arguments=["PyObject* s"],
321+
callfunction="wrap_PyUnicode_DecodeUTF8Stateful",
322+
cmpfunc=unhandled_error_compare
323+
)
324+
307325
test_PyUnicode_AsLatin1String = CPyExtFunction(
308326
lambda args: args[0].encode("iso-8859-1"),
309327
lambda: (

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/PythonCextBuiltins.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1153,7 +1153,12 @@ protected static CharBuffer allocateCharBuffer(int cap) {
11531153

11541154
@TruffleBoundary
11551155
protected static String toString(CharBuffer cb) {
1156-
return cb.toString();
1156+
int len = cb.position();
1157+
if (len > 0) {
1158+
cb.rewind();
1159+
return cb.subSequence(0, len).toString();
1160+
}
1161+
return "";
11571162
}
11581163

11591164
@TruffleBoundary

0 commit comments

Comments
 (0)