Skip to content

Commit 5705add

Browse files
committed
[GR-54936][GR-51959] Support native string subclasses in more builtins
PullRequest: graalpython/3382
2 parents 4067714 + ed32f61 commit 5705add

File tree

16 files changed

+301
-88
lines changed

16 files changed

+301
-88
lines changed

graalpython/com.oracle.graal.python.cext/src/capi.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,8 @@ typedef struct {
103103
PyObject *weakreflist;
104104
} PyPickleBufferObject;
105105

106+
// defined in 'unicodeobject.c'
107+
void unicode_dealloc(PyObject *unicode);
106108

107109
static void object_dealloc(PyObject *self) {
108110
Py_TYPE(self)->tp_free(self);

graalpython/com.oracle.graal.python.cext/src/capi.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -326,7 +326,7 @@ PY_TRUFFLE_TYPE(PyStaticMethod_Type, "staticmethod", &PyType_Type, sizeof(Py
326326
PY_TRUFFLE_TYPE(PySuper_Type, "super", &PyType_Type, sizeof(superobject)) \
327327
PY_TRUFFLE_TYPE(PyTraceBack_Type, "traceback", &PyType_Type, sizeof(PyTypeObject)) \
328328
PY_TRUFFLE_TYPE_GENERIC(PyTuple_Type, "tuple", &PyType_Type, sizeof(PyTupleObject) - sizeof(PyObject *), sizeof(PyObject *), PyTruffle_Tuple_Alloc, (destructor)PyTruffle_Tuple_Dealloc, 0, 0) \
329-
PY_TRUFFLE_TYPE(PyUnicode_Type, "str", &PyType_Type, sizeof(PyUnicodeObject)) \
329+
PY_TRUFFLE_TYPE_GENERIC(PyUnicode_Type, "str", &PyType_Type, sizeof(PyUnicodeObject), 0, NULL, unicode_dealloc, PyObject_Del, 0) \
330330
/* NOTE: we use the same Python type (namely 'PBuiltinFunction') for 'wrapper_descriptor' as for 'method_descriptor'; so the flags must be the same! */ \
331331
PY_TRUFFLE_TYPE(PyWrapperDescr_Type, "wrapper_descriptor", &PyType_Type, sizeof(PyWrapperDescrObject)) \
332332
PY_TRUFFLE_TYPE(PyZip_Type, "zip", &PyType_Type, sizeof(zipobject)) \

graalpython/com.oracle.graal.python.cext/src/unicodeobject.c

Lines changed: 43 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,6 @@ extern "C" {
119119
# define _PyUnicode_CHECK(op) PyUnicode_Check(op)
120120
#endif
121121

122-
#if 0 // GraalPy change
123122
#define _PyUnicode_UTF8(op) \
124123
(_PyCompactUnicodeObject_CAST(op)->utf8)
125124
#define PyUnicode_UTF8(op) \
@@ -190,7 +189,6 @@ extern "C" {
190189
((_PyUnicode_WSTR(op) && \
191190
(!PyUnicode_IS_READY(op) || \
192191
_PyUnicode_WSTR(op) != PyUnicode_DATA(op))))
193-
#endif // GraalPy change
194192

195193
/* Generic helper macro to convert characters of different types.
196194
from_type and to_type have to be valid type names, begin and end
@@ -1715,8 +1713,8 @@ _PyUnicode_Ready(PyObject *unicode)
17151713
return 0;
17161714
}
17171715

1718-
#if 0 // GraalPy change
1719-
static void
1716+
// GraalPy change: export
1717+
PyAPI_FUNC(void)
17201718
unicode_dealloc(PyObject *unicode)
17211719
{
17221720
#ifdef Py_DEBUG
@@ -1725,6 +1723,7 @@ unicode_dealloc(PyObject *unicode)
17251723
}
17261724
#endif
17271725

1726+
#if 0 // GraalPy change
17281727
switch (PyUnicode_CHECK_INTERNED(unicode)) {
17291728
case SSTATE_NOT_INTERNED:
17301729
break;
@@ -1753,6 +1752,7 @@ unicode_dealloc(PyObject *unicode)
17531752
default:
17541753
Py_UNREACHABLE();
17551754
}
1755+
#endif // GraalPy change
17561756

17571757
if (_PyUnicode_HAS_WSTR_MEMORY(unicode)) {
17581758
PyObject_Free(_PyUnicode_WSTR(unicode));
@@ -1767,6 +1767,7 @@ unicode_dealloc(PyObject *unicode)
17671767
Py_TYPE(unicode)->tp_free(unicode);
17681768
}
17691769

1770+
#if 0 // GraalPy change
17701771
#ifdef Py_DEBUG
17711772
static int
17721773
unicode_is_singleton(PyObject *unicode)
@@ -3726,12 +3727,27 @@ static int unicode_fill_utf8(PyObject *unicode);
37263727
const char *
37273728
PyUnicode_AsUTF8AndSize(PyObject *unicode, Py_ssize_t *psize)
37283729
{
3729-
// GraalPy change: different implementation
3730-
const char* charptr = GraalPyTruffle_Unicode_AsUTF8AndSize_CharPtr(unicode);
3731-
if (charptr && psize) {
3732-
*psize = GraalPyTruffle_Unicode_AsUTF8AndSize_Size(unicode);
3730+
if (!PyUnicode_Check(unicode)) {
3731+
PyErr_BadArgument();
3732+
return NULL;
3733+
}
3734+
// GraalPy change: upcall for managed objects
3735+
if (points_to_py_handle_space(unicode)) {
3736+
return GraalPyTruffleUnicode_AsUTF8AndSize(unicode, psize);
37333737
}
3734-
return charptr;
3738+
if (PyUnicode_READY(unicode) == -1)
3739+
return NULL;
3740+
3741+
if (PyUnicode_UTF8(unicode) == NULL) {
3742+
// GraalPy change: upcall
3743+
if (GraalPyTruffleUnicode_FillUtf8(unicode) == -1) {
3744+
return NULL;
3745+
}
3746+
}
3747+
3748+
if (psize)
3749+
*psize = PyUnicode_UTF8_LENGTH(unicode);
3750+
return PyUnicode_UTF8(unicode);
37353751
}
37363752

37373753
const char *
@@ -3743,12 +3759,25 @@ PyUnicode_AsUTF8(PyObject *unicode)
37433759
Py_UNICODE *
37443760
PyUnicode_AsUnicodeAndSize(PyObject *unicode, Py_ssize_t *size)
37453761
{
3746-
// GraalPy change: different implementation
3747-
Py_UNICODE* charptr = GraalPyTruffle_Unicode_AsUnicodeAndSize_CharPtr(unicode);
3748-
if (charptr && size) {
3749-
*size = GraalPyTruffle_Unicode_AsUnicodeAndSize_Size(unicode);
3762+
if (!PyUnicode_Check(unicode)) {
3763+
PyErr_BadArgument();
3764+
return NULL;
3765+
}
3766+
// GraalPy change: upcall for managed objects
3767+
if (points_to_py_handle_space(unicode)) {
3768+
return GraalPyTruffleUnicode_AsUnicodeAndSize(unicode, size);
3769+
}
3770+
Py_UNICODE *w = _PyUnicode_WSTR(unicode);
3771+
if (w == NULL) {
3772+
// GraalPy change: upcall
3773+
if (GraalPyTruffleUnicode_FillUnicode(unicode) == -1) {
3774+
return NULL;
3775+
}
3776+
w = _PyUnicode_WSTR(unicode);
37503777
}
3751-
return charptr;
3778+
if (size != NULL)
3779+
*size = PyUnicode_WSTR_LENGTH(unicode);
3780+
return w;
37523781
}
37533782

37543783
/* Deprecated APIs */
@@ -14466,24 +14495,6 @@ unicode_new_impl(PyTypeObject *type, PyObject *x, const char *encoding,
1446614495
PyAPI_FUNC(PyObject *) // GraalPy change: export for downcall
1446714496
unicode_subtype_new(PyTypeObject *type, PyObject *unicode)
1446814497
{
14469-
// GraalPy change: temporarily define struct access macros
14470-
#define _PyUnicode_STATE(op) \
14471-
(_PyASCIIObject_CAST(op)->state)
14472-
#define _PyUnicode_DATA_ANY(op) \
14473-
(_PyUnicodeObject_CAST(op)->data.any)
14474-
#define _PyUnicode_LENGTH(op) \
14475-
(_PyASCIIObject_CAST(op)->length)
14476-
#define _PyUnicode_HASH(op) \
14477-
(_PyASCIIObject_CAST(op)->hash)
14478-
#define _PyUnicode_UTF8(op) \
14479-
(_PyCompactUnicodeObject_CAST(op)->utf8)
14480-
#define _PyUnicode_UTF8_LENGTH(op) \
14481-
(_PyCompactUnicodeObject_CAST(op)->utf8_length)
14482-
#define _PyUnicode_WSTR(op) \
14483-
(_PyASCIIObject_CAST(op)->wstr)
14484-
#define _PyUnicode_WSTR_LENGTH(op) \
14485-
(_PyCompactUnicodeObject_CAST(op)->wstr_length)
14486-
1448714498
PyObject *self;
1448814499
Py_ssize_t length, char_size;
1448914500
int share_wstr, share_utf8;
@@ -14569,15 +14580,6 @@ unicode_subtype_new(PyTypeObject *type, PyObject *unicode)
1456914580
onError:
1457014581
Py_DECREF(self);
1457114582
return NULL;
14572-
// GraalPy change
14573-
#undef _PyUnicode_STATE
14574-
#undef _PyUnicode_DATA_ANY
14575-
#undef _PyUnicode_LENGTH
14576-
#undef _PyUnicode_HASH
14577-
#undef _PyUnicode_UTF8
14578-
#undef _PyUnicode_UTF8_LENGTH
14579-
#undef _PyUnicode_WSTR
14580-
#undef _PyUnicode_WSTR_LENGTH
1458114583
}
1458214584

1458314585
#if 0 // GraalPy change

graalpython/com.oracle.graal.python.test/src/tests/cpyext/test_unicode.py

Lines changed: 57 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@
4141
import re
4242
import sys
4343

44-
from . import CPyExtType, CPyExtTestCase, CPyExtFunction, unhandled_error_compare, GRAALPYTHON, CPyExtFunctionOutVars
44+
from . import CPyExtType, CPyExtTestCase, CPyExtFunction, unhandled_error_compare, GRAALPYTHON, CPyExtFunctionOutVars, \
45+
is_native_object
4546

4647
__dir__ = __file__.rpartition("/")[0]
4748

@@ -219,6 +220,17 @@ def gen_intern_args():
219220
return args
220221

221222

223+
UnicodeSubclass = CPyExtType(
224+
"UnicodeSubclass",
225+
'',
226+
struct_base='PyUnicodeObject base;',
227+
tp_base='&PyUnicode_Type',
228+
tp_new='0',
229+
tp_alloc='0',
230+
tp_free='0',
231+
)
232+
233+
222234
class TestPyUnicode(CPyExtTestCase):
223235

224236
test_PyUnicode_FromObject = CPyExtFunction(
@@ -229,6 +241,7 @@ class TestPyUnicode(CPyExtTestCase):
229241
(b"hello",),
230242
(Dummy(),),
231243
(str,),
244+
(UnicodeSubclass("asdf"),),
232245
),
233246
resultspec="O",
234247
argspec='O',
@@ -408,6 +421,7 @@ class TestPyUnicode(CPyExtTestCase):
408421
("hello",),
409422
("world",),
410423
("this is a longer text also cöntaining weird Ümläuts",),
424+
(UnicodeSubclass("asdf"),),
411425
),
412426
resultspec="n",
413427
argspec='O',
@@ -421,6 +435,7 @@ class TestPyUnicode(CPyExtTestCase):
421435
("hello", ", world"),
422436
("", "world"),
423437
("this is a longer text also cöntaining weird Ümläuts", ""),
438+
(UnicodeSubclass("asdf"), "gh"),
424439
),
425440
resultspec="O",
426441
argspec='OO',
@@ -477,6 +492,7 @@ class TestPyUnicode(CPyExtTestCase):
477492
lambda: (
478493
("hello",),
479494
("hellö",),
495+
(UnicodeSubclass("asdf"),),
480496
),
481497
resultspec="s",
482498
argspec='O',
@@ -489,6 +505,8 @@ class TestPyUnicode(CPyExtTestCase):
489505
lambda: (
490506
("hello",),
491507
("hellö",),
508+
(UnicodeSubclass("asdf"),),
509+
(UnicodeSubclass("žluťoučký kůň"),),
492510
),
493511
resultspec="O",
494512
argspec='O',
@@ -501,6 +519,8 @@ class TestPyUnicode(CPyExtTestCase):
501519
lambda: (
502520
("hello",),
503521
("hellö",),
522+
(UnicodeSubclass("asdf"),),
523+
(UnicodeSubclass("žluťoučký kůň"),),
504524
),
505525
resultspec="yn",
506526
resulttype='const char*',
@@ -650,6 +670,7 @@ class TestPyUnicode(CPyExtTestCase):
650670
lambda: (
651671
("hello",),
652672
("hellö",),
673+
(UnicodeSubclass("asdf"),),
653674
),
654675
resultspec="O",
655676
argspec='O',
@@ -662,6 +683,7 @@ class TestPyUnicode(CPyExtTestCase):
662683
lambda: (
663684
("hello",),
664685
("hellö",),
686+
(UnicodeSubclass("asdf"),),
665687
),
666688
resultspec="O",
667689
argspec='O',
@@ -676,6 +698,7 @@ class TestPyUnicode(CPyExtTestCase):
676698
("hellö, %s", ("wörld",)),
677699
("%s, %r", ("hello", "world")),
678700
("nothing else", tuple()),
701+
(UnicodeSubclass("%s, %r"), ("hello", "world")),
679702
),
680703
resultspec="O",
681704
argspec='OO',
@@ -691,6 +714,7 @@ class TestPyUnicode(CPyExtTestCase):
691714
(b"hello",),
692715
("hellö",),
693716
(['a', 'b', 'c'],),
717+
(UnicodeSubclass("asdf"),),
694718
),
695719
resultspec="i",
696720
argspec='O',
@@ -714,6 +738,8 @@ class TestPyUnicode(CPyExtTestCase):
714738
lambda: (
715739
("hello", b'\x68\x00\x65\x00\x6c\x00\x6c\x00\x6f\x00',
716740
b"\x68\x00\x00\x00\x65\x00\x00\x00\x6c\x00\x00\x00\x6c\x00\x00\x00\x6f\x00\x00\x00"),
741+
(UnicodeSubclass("hello"), b'\x68\x00\x65\x00\x6c\x00\x6c\x00\x6f\x00',
742+
b"\x68\x00\x00\x00\x65\x00\x00\x00\x6c\x00\x00\x00\x6c\x00\x00\x00\x6f\x00\x00\x00"),
717743
),
718744
code=""" PyObject* wrap_PyUnicode_AsUnicode(PyObject* unicodeObj, PyObject* expected_16, PyObject* expected_32) {
719745
Py_ssize_t n = Py_UNICODE_SIZE == 2 ? PyBytes_Size(expected_16) : PyBytes_Size(expected_32);
@@ -741,6 +767,8 @@ class TestPyUnicode(CPyExtTestCase):
741767
lambda: (
742768
("hello", b'\x68\x00\x65\x00\x6c\x00\x6c\x00\x6f\x00',
743769
b"\x68\x00\x00\x00\x65\x00\x00\x00\x6c\x00\x00\x00\x6c\x00\x00\x00\x6f\x00\x00\x00"),
770+
(UnicodeSubclass("hello"), b'\x68\x00\x65\x00\x6c\x00\x6c\x00\x6f\x00',
771+
b"\x68\x00\x00\x00\x65\x00\x00\x00\x6c\x00\x00\x00\x6c\x00\x00\x00\x6f\x00\x00\x00"),
744772
),
745773
code=""" PyObject* wrap_PyUnicode_AsUnicodeAndSize(PyObject* unicodeObj, PyObject* expected_16, PyObject* expected_32) {
746774
Py_ssize_t n = Py_UNICODE_SIZE == 2 ? PyBytes_Size(expected_16) : PyBytes_Size(expected_32);
@@ -798,6 +826,7 @@ class TestPyUnicode(CPyExtTestCase):
798826
("hello", 0, 1),
799827
("hello", 4, 5),
800828
("hello", 1, 4),
829+
(UnicodeSubclass("asdf"), 2, 4),
801830
),
802831
resultspec="O",
803832
argspec='Onn',
@@ -826,6 +855,7 @@ class TestPyUnicode(CPyExtTestCase):
826855
("a", "b"),
827856
("a", None),
828857
("a", 1),
858+
(UnicodeSubclass("asdf"), "asdf"),
829859
),
830860
resultspec="i",
831861
argspec='OO',
@@ -840,6 +870,7 @@ class TestPyUnicode(CPyExtTestCase):
840870
("a", "b"),
841871
("a", "ab"),
842872
("ab", "a"),
873+
(UnicodeSubclass("asdf"), "asdf"),
843874
),
844875
resultspec="i",
845876
argspec='Os',
@@ -907,6 +938,7 @@ class TestPyUnicode(CPyExtTestCase):
907938
("öüä", "ascii", "ignore"),
908939
("öüä", "ascii", "replace"),
909940
(1, "ascii", "replace"),
941+
(UnicodeSubclass("asdf"), "ascii", "report"),
910942
),
911943
resultspec="O",
912944
argspec='Oss',
@@ -920,6 +952,7 @@ class TestPyUnicode(CPyExtTestCase):
920952
("abcd",),
921953
("öüä",),
922954
(1,),
955+
(UnicodeSubclass("asdf"),),
923956
),
924957
resultspec="O",
925958
argspec='O',
@@ -991,6 +1024,7 @@ class TestPyUnicode(CPyExtTestCase):
9911024
("hello", 100),
9921025
("hello", -1),
9931026
("höllö", 4),
1027+
(UnicodeSubclass("asdf"), 1),
9941028
),
9951029
code='''PyObject* wrap_PyUnicode_ReadChar(PyObject* unicode, Py_ssize_t index) {
9961030
Py_UCS4 res = PyUnicode_ReadChar(unicode, index);
@@ -1012,6 +1046,7 @@ class TestPyUnicode(CPyExtTestCase):
10121046
lambda: (
10131047
("aaa", "bbb"),
10141048
("aaa", "a"),
1049+
(UnicodeSubclass("asdf"), "s"),
10151050
),
10161051
resultspec="i",
10171052
argspec='OO',
@@ -1024,6 +1059,7 @@ class TestPyUnicode(CPyExtTestCase):
10241059
lambda: (
10251060
("foo.bar.baz", ".", 0),
10261061
("foo.bar.baz", ".", 1),
1062+
(UnicodeSubclass("foo.bar.baz"), ".", 1),
10271063
("foo.bar.baz", 7, 0),
10281064
),
10291065
resultspec="O",
@@ -1080,6 +1116,7 @@ class TestPyUnicode(CPyExtTestCase):
10801116
("ššš",),
10811117
("すごい",),
10821118
("😂",),
1119+
(UnicodeSubclass("asdf"),)
10831120
),
10841121
code='''
10851122
PyObject* wrap_PyUnicode_DATA(PyObject* string) {
@@ -1129,3 +1166,22 @@ def test_intern(self):
11291166
s2 = b'some text'.decode('ascii')
11301167
assert tester.set_intern_str(s1) == s2
11311168
assert tester.check_is_same_str_ptr(s2)
1169+
1170+
1171+
class TestNativeUnicodeSubclass:
1172+
def test_builtins(self):
1173+
s = UnicodeSubclass("asdf")
1174+
assert is_native_object(s)
1175+
assert type(s) is UnicodeSubclass
1176+
assert len(s) == 4
1177+
assert s[1] == 's'
1178+
assert s == "asdf"
1179+
assert s + "gh" == "asdfgh"
1180+
assert s > "asc"
1181+
assert s >= "asdf"
1182+
assert s < "b"
1183+
assert s <= "asdf"
1184+
assert s[1:] == "sdf"
1185+
assert "sd" in s
1186+
assert UnicodeSubclass("<{}>").format("asdf") == "<asdf>"
1187+
assert UnicodeSubclass("<%s>") % "asdf" == "<asdf>"

0 commit comments

Comments
 (0)