Skip to content

Commit c414421

Browse files
committed
[GR-46360] Backport: Make lxml work again
PullRequest: graalpython/2795
2 parents 8d4267c + fab1336 commit c414421

File tree

10 files changed

+873
-697
lines changed

10 files changed

+873
-697
lines changed

graalpython/com.oracle.graal.python.cext/src/capi.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2085,6 +2085,22 @@ PyAPI_FUNC(void) PyTruffle_DebugTrace() {
20852085
PyAPI_FUNC(int) PyTruffle_ToNative(void* a) {
20862086
return GraalPyTruffle_ToNative(a);
20872087
}
2088+
#undef PyTruffle_Unicode_AsUTF8AndSize_CharPtr
2089+
PyAPI_FUNC(const char*) PyTruffle_Unicode_AsUTF8AndSize_CharPtr(PyObject* a) {
2090+
return GraalPyTruffle_Unicode_AsUTF8AndSize_CharPtr(a);
2091+
}
2092+
#undef PyTruffle_Unicode_AsUTF8AndSize_Size
2093+
PyAPI_FUNC(Py_ssize_t) PyTruffle_Unicode_AsUTF8AndSize_Size(PyObject* a) {
2094+
return GraalPyTruffle_Unicode_AsUTF8AndSize_Size(a);
2095+
}
2096+
#undef PyTruffle_Unicode_AsUnicodeAndSize_CharPtr
2097+
PyAPI_FUNC(Py_UNICODE*) PyTruffle_Unicode_AsUnicodeAndSize_CharPtr(PyObject* a) {
2098+
return GraalPyTruffle_Unicode_AsUnicodeAndSize_CharPtr(a);
2099+
}
2100+
#undef PyTruffle_Unicode_AsUnicodeAndSize_Size
2101+
PyAPI_FUNC(Py_ssize_t) PyTruffle_Unicode_AsUnicodeAndSize_Size(PyObject* a) {
2102+
return GraalPyTruffle_Unicode_AsUnicodeAndSize_Size(a);
2103+
}
20882104
#undef PyTuple_GetItem
20892105
PyAPI_FUNC(PyObject*) PyTuple_GetItem(PyObject* a, Py_ssize_t b) {
20902106
return GraalPyTuple_GetItem(a, b);

graalpython/com.oracle.graal.python.cext/src/capi.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -356,7 +356,10 @@ typedef struct {
356356
BUILTIN(PyTruffle_True, PyObject*) \
357357
BUILTIN(PyTruffle_Type, PyTypeObject*, const char*) \
358358
BUILTIN(PyTruffle_Type_Modified, int, PyTypeObject*, const char*, PyObject*) \
359-
BUILTIN(PyTruffle_Unicode_AsUnicodeAndSize, PyObject*, PyObject*) \
359+
BUILTIN(PyTruffle_Unicode_AsUTF8AndSize_CharPtr, const char*, PyObject*) \
360+
BUILTIN(PyTruffle_Unicode_AsUTF8AndSize_Size, Py_ssize_t, PyObject*) \
361+
BUILTIN(PyTruffle_Unicode_AsUnicodeAndSize_CharPtr, Py_UNICODE*, PyObject*) \
362+
BUILTIN(PyTruffle_Unicode_AsUnicodeAndSize_Size, Py_ssize_t, PyObject*) \
360363
BUILTIN(PyTruffle_Unicode_AsWideChar, PyObject*, PyObject*, int) \
361364
BUILTIN(PyTruffle_Unicode_DecodeUTF32, PyObject*, void*, Py_ssize_t, const char*, int) \
362365
BUILTIN(PyTruffle_Unicode_FromFormat, PyObject*, const char*, va_list*) \

graalpython/com.oracle.graal.python.cext/src/unicodeobject.c

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -246,12 +246,11 @@ const char* PyUnicode_AsUTF8(PyObject *unicode) {
246246
}
247247

248248
const char* PyUnicode_AsUTF8AndSize(PyObject *unicode, Py_ssize_t *psize) {
249-
PyObject *result;
250-
result = _PyUnicode_AsUTF8String(unicode, NULL);
251-
if (psize) {
252-
*psize = PyObject_Length(result);
249+
const char* charptr = GraalPyTruffle_Unicode_AsUTF8AndSize_CharPtr(unicode);
250+
if (charptr && psize) {
251+
*psize = GraalPyTruffle_Unicode_AsUTF8AndSize_Size(unicode);
253252
}
254-
return PyBytes_AsString(result);
253+
return charptr;
255254
}
256255

257256
// taken from CPython "Python/Objects/unicodeobject.c"
@@ -295,13 +294,11 @@ Py_UNICODE* PyUnicode_AsUnicode(PyObject *unicode) {
295294
}
296295

297296
Py_UNICODE* PyUnicode_AsUnicodeAndSize(PyObject *unicode, Py_ssize_t *size) {
298-
PyObject* bytes = GraalPyTruffle_Unicode_AsWideChar(unicode, Py_UNICODE_SIZE);
299-
if (bytes != NULL) {
300-
// exclude null terminator at the end
301-
*size = PyBytes_Size(bytes) / Py_UNICODE_SIZE;
302-
return (Py_UNICODE*) PyBytes_AsString(bytes);
297+
Py_UNICODE* charptr = GraalPyTruffle_Unicode_AsUnicodeAndSize_CharPtr(unicode);
298+
if (charptr && size) {
299+
*size = GraalPyTruffle_Unicode_AsUnicodeAndSize_Size(unicode);
303300
}
304-
return NULL;
301+
return charptr;
305302
}
306303

307304
int _PyUnicode_Ready(PyObject *unicode) {

graalpython/com.oracle.graal.python.jni/src/capi_forwards.h

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -791,7 +791,10 @@ void unimplemented(const char* name) {
791791
#undef PyTruffle_True
792792
#undef PyTruffle_Type
793793
#undef PyTruffle_Type_Modified
794-
#undef PyTruffle_Unicode_AsUnicodeAndSize
794+
#undef PyTruffle_Unicode_AsUTF8AndSize_CharPtr
795+
#undef PyTruffle_Unicode_AsUTF8AndSize_Size
796+
#undef PyTruffle_Unicode_AsUnicodeAndSize_CharPtr
797+
#undef PyTruffle_Unicode_AsUnicodeAndSize_Size
795798
#undef PyTruffle_Unicode_AsWideChar
796799
#undef PyTruffle_Unicode_DecodeUTF32
797800
#undef PyTruffle_Unicode_FromFormat
@@ -3940,6 +3943,22 @@ PyAPI_FUNC(int) PyTruffle_ToNative(void* a) {
39403943
int result = (int) GraalPyTruffle_ToNative(a);
39413944
return result;
39423945
}
3946+
PyAPI_FUNC(const char*) PyTruffle_Unicode_AsUTF8AndSize_CharPtr(PyObject* a) {
3947+
const char* result = (const char*) GraalPyTruffle_Unicode_AsUTF8AndSize_CharPtr(a);
3948+
return result;
3949+
}
3950+
PyAPI_FUNC(Py_ssize_t) PyTruffle_Unicode_AsUTF8AndSize_Size(PyObject* a) {
3951+
Py_ssize_t result = (Py_ssize_t) GraalPyTruffle_Unicode_AsUTF8AndSize_Size(a);
3952+
return result;
3953+
}
3954+
PyAPI_FUNC(Py_UNICODE*) PyTruffle_Unicode_AsUnicodeAndSize_CharPtr(PyObject* a) {
3955+
Py_UNICODE* result = (Py_UNICODE*) GraalPyTruffle_Unicode_AsUnicodeAndSize_CharPtr(a);
3956+
return result;
3957+
}
3958+
PyAPI_FUNC(Py_ssize_t) PyTruffle_Unicode_AsUnicodeAndSize_Size(PyObject* a) {
3959+
Py_ssize_t result = (Py_ssize_t) GraalPyTruffle_Unicode_AsUnicodeAndSize_Size(a);
3960+
return result;
3961+
}
39433962
PyAPI_FUNC(PyObject*) PyTuple_GetItem(PyObject* a, Py_ssize_t b) {
39443963
PyObject* result = (PyObject*) GraalPyTuple_GetItem(a, b);
39453964
return result;

graalpython/com.oracle.graal.python.test/src/tests/cpyext/test_unicode.py

Lines changed: 35 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved.
1+
# Copyright (c) 2018, 2023, Oracle and/or its affiliates. All rights reserved.
22
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
33
#
44
# The Universal Permissive License (UPL), Version 1.0
@@ -37,10 +37,10 @@
3737
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
3838
# SOFTWARE.
3939

40-
import sys
4140
import re
41+
import sys
4242

43-
from . import CPyExtTestCase, CPyExtFunction, unhandled_error_compare, GRAALPYTHON
43+
from . import CPyExtTestCase, CPyExtFunction, unhandled_error_compare, GRAALPYTHON, CPyExtFunctionOutVars
4444

4545
__dir__ = __file__.rpartition("/")[0]
4646

@@ -99,9 +99,10 @@ def _reference_contains(args):
9999
raise TypeError
100100
return args[1] in args[0]
101101

102+
102103
def _reference_compare(args):
103104
if not isinstance(args[0], str) or not isinstance(args[1], str):
104-
raise TypeError
105+
raise TypeError
105106

106107
if args[0] == args[1]:
107108
return 0
@@ -110,24 +111,29 @@ def _reference_compare(args):
110111
else:
111112
return 1
112113

114+
113115
def _reference_as_encoded_string(args):
114116
if not isinstance(args[0], str):
115-
raise TypeError
117+
raise TypeError
116118

117119
s = args[0]
118120
encoding = args[1]
119121
errors = args[2]
120122
return s.encode(encoding, errors)
121123

124+
122125
_codecs_module = None
126+
127+
123128
def _reference_as_unicode_escape_string(args):
124129
if not isinstance(args[0], str):
125-
raise TypeError
130+
raise TypeError
126131
global _codecs_module
127132
if not _codecs_module:
128133
import _codecs as _codecs_module
129134
return _codecs_module.unicode_escape_encode(args[0])[0]
130135

136+
131137
def _reference_tailmatch(args):
132138
if not isinstance(args[0], str) or not isinstance(args[1], str):
133139
raise TypeError
@@ -141,6 +147,7 @@ def _reference_tailmatch(args):
141147
return 1 if s[start:end].endswith(substr) else 0
142148
return 1 if s[start:end].startswith(substr) else 0
143149

150+
144151
class CustomString(str):
145152
pass
146153

@@ -336,7 +343,7 @@ def compile_module(self, name):
336343
_reference_fromformat,
337344
lambda: (
338345
("word0: %s; word1: %s; int: %d; long long: %lld", "hello", "world", 1234, 1234),
339-
("word0: %s; word1: %s; int: %d; long long: %lld", "hello", "world", 1234, (1<<44)+123),
346+
("word0: %s; word1: %s; int: %d; long long: %lld", "hello", "world", 1234, (1 << 44) + 123),
340347
),
341348
code="typedef long long longlong_t;",
342349
resultspec="O",
@@ -461,6 +468,20 @@ def compile_module(self, name):
461468
cmpfunc=unhandled_error_compare
462469
)
463470

471+
test_PyUnicode_AsUTF8AndSize = CPyExtFunctionOutVars(
472+
lambda args: (s := args[0].encode("utf-8"), len(s)),
473+
lambda: (
474+
("hello",),
475+
("hellö",),
476+
),
477+
resultspec="yn",
478+
resulttype='const char*',
479+
argspec='O',
480+
arguments=["PyObject* s"],
481+
resultvars=["Py_ssize_t size"],
482+
cmpfunc=unhandled_error_compare
483+
)
484+
464485
test_PyUnicode_DecodeUTF32 = CPyExtFunction(
465486
lambda args: args[1],
466487
lambda: (
@@ -487,7 +508,7 @@ def compile_module(self, name):
487508
test_PyUnicode_DecodeUTF8Stateful = CPyExtFunction(
488509
lambda args: args[0],
489510
lambda: (
490-
("_type_", ),
511+
("_type_",),
491512
),
492513
code="""PyObject* wrap_PyUnicode_DecodeUTF8Stateful(PyObject* _type_str) {
493514
_Py_IDENTIFIER(_type_);
@@ -569,7 +590,8 @@ def compile_module(self, name):
569590
test_PyUnicode_AsUnicode = CPyExtFunction(
570591
lambda args: True,
571592
lambda: (
572-
("hello", b'\x68\x00\x65\x00\x6c\x00\x6c\x00\x6f\x00', b"\x68\x00\x00\x00\x65\x00\x00\x00\x6c\x00\x00\x00\x6c\x00\x00\x00\x6f\x00\x00\x00"),
593+
("hello", b'\x68\x00\x65\x00\x6c\x00\x6c\x00\x6f\x00',
594+
b"\x68\x00\x00\x00\x65\x00\x00\x00\x6c\x00\x00\x00\x6c\x00\x00\x00\x6f\x00\x00\x00"),
573595
),
574596
code=""" PyObject* wrap_PyUnicode_AsUnicode(PyObject* unicodeObj, PyObject* expected_16, PyObject* expected_32) {
575597
Py_ssize_t n = Py_UNICODE_SIZE == 2 ? PyBytes_Size(expected_16) : PyBytes_Size(expected_32);
@@ -595,7 +617,8 @@ def compile_module(self, name):
595617
test_PyUnicode_AsUnicodeAndSize = CPyExtFunction(
596618
lambda args: True,
597619
lambda: (
598-
("hello", b'\x68\x00\x65\x00\x6c\x00\x6c\x00\x6f\x00', b"\x68\x00\x00\x00\x65\x00\x00\x00\x6c\x00\x00\x00\x6c\x00\x00\x00\x6f\x00\x00\x00"),
620+
("hello", b'\x68\x00\x65\x00\x6c\x00\x6c\x00\x6f\x00',
621+
b"\x68\x00\x00\x00\x65\x00\x00\x00\x6c\x00\x00\x00\x6c\x00\x00\x00\x6f\x00\x00\x00"),
599622
),
600623
code=""" PyObject* wrap_PyUnicode_AsUnicodeAndSize(PyObject* unicodeObj, PyObject* expected_16, PyObject* expected_32) {
601624
Py_ssize_t n = Py_UNICODE_SIZE == 2 ? PyBytes_Size(expected_16) : PyBytes_Size(expected_32);
@@ -673,7 +696,7 @@ def compile_module(self, name):
673696
arguments=["PyObject* str", "PyObject* seq"],
674697
cmpfunc=unhandled_error_compare
675698
)
676-
699+
677700
test_PyUnicode_Compare = CPyExtFunction(
678701
_reference_compare,
679702
lambda: (
@@ -738,7 +761,6 @@ def compile_module(self, name):
738761
cmpfunc=unhandled_error_compare
739762
)
740763

741-
742764
test_PyUnicode_AsEncodedString = CPyExtFunction(
743765
_reference_as_encoded_string,
744766
lambda: (
@@ -760,7 +782,7 @@ def compile_module(self, name):
760782
_reference_as_unicode_escape_string,
761783
lambda: (
762784
("abcd",),
763-
("öüä",),
785+
("öüä",),
764786
(1,),
765787
),
766788
resultspec="O",
@@ -873,5 +895,3 @@ def compile_module(self, name):
873895
arguments=["PyObject* string", "PyObject* sep", "Py_ssize_t maxsplit"],
874896
cmpfunc=unhandled_error_compare
875897
)
876-
877-

0 commit comments

Comments
 (0)