Skip to content

Commit 4311335

Browse files
committed
Simplify PyUnicode_AsUTF8AndSize
1 parent 77a195a commit 4311335

File tree

3 files changed

+48
-37
lines changed

3 files changed

+48
-37
lines changed

graalpython/com.oracle.graal.python.cext/src/unicodeobject.c

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3722,18 +3722,28 @@ PyUnicode_FSDecoder(PyObject* arg, void* addr)
37223722

37233723

37243724
static int unicode_fill_utf8(PyObject *unicode);
3725-
#endif // GraalPy change
37263725

37273726
const char *
37283727
PyUnicode_AsUTF8AndSize(PyObject *unicode, Py_ssize_t *psize)
37293728
{
3730-
// GraalPy change: different implementation
3731-
const char* charptr = GraalPyTruffle_Unicode_AsUTF8AndSize_CharPtr(unicode);
3732-
if (charptr && psize) {
3733-
*psize = GraalPyTruffle_Unicode_AsUTF8AndSize_Size(unicode);
3729+
if (!PyUnicode_Check(unicode)) {
3730+
PyErr_BadArgument();
3731+
return NULL;
37343732
}
3735-
return charptr;
3733+
if (PyUnicode_READY(unicode) == -1)
3734+
return NULL;
3735+
3736+
if (PyUnicode_UTF8(unicode) == NULL) {
3737+
if (unicode_fill_utf8(unicode) == -1) {
3738+
return NULL;
3739+
}
3740+
}
3741+
3742+
if (psize)
3743+
*psize = PyUnicode_UTF8_LENGTH(unicode);
3744+
return PyUnicode_UTF8(unicode);
37363745
}
3746+
#endif // GraalPy change
37373747

37383748
const char *
37393749
PyUnicode_AsUTF8(PyObject *unicode)

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cext/PythonCextUnicodeBuiltins.java

Lines changed: 32 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.ArgDescriptor.ConstCharPtr;
5959
import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.ArgDescriptor.ConstCharPtrAsTruffleString;
6060
import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.ArgDescriptor.Int;
61+
import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.ArgDescriptor.PY_SSIZE_T_PTR;
6162
import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.ArgDescriptor.PY_UCS4;
6263
import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.ArgDescriptor.PY_UNICODE_PTR;
6364
import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.ArgDescriptor.Pointer;
@@ -1021,68 +1022,69 @@ public static _PyUnicode_AsUTF8String create() {
10211022
}
10221023
}
10231024

1024-
@CApiBuiltin(ret = ConstCharPtr, args = {PyObject}, call = Direct)
1025-
abstract static class PyTruffle_Unicode_AsUTF8AndSize_CharPtr extends CApiUnaryBuiltinNode {
1025+
@CApiBuiltin(ret = ConstCharPtr, args = {PyObject, PY_SSIZE_T_PTR}, call = Direct)
1026+
abstract static class PyUnicode_AsUTF8AndSize extends CApiBinaryBuiltinNode {
10261027

10271028
@Specialization
1028-
static Object doUnicode(PString s,
1029+
static Object doUnicode(PString s, Object sizePtr,
10291030
@Bind("this") Node inliningTarget,
1030-
@Cached InlinedConditionProfile profile,
1031+
@Shared @CachedLibrary(limit = "2") InteropLibrary lib,
1032+
@Shared @Cached InlinedConditionProfile hasSizeProfile,
1033+
@Shared @Cached InlinedConditionProfile hasUtf8Profile,
1034+
@Shared @Cached CStructAccess.WriteLongNode writeLongNode,
10311035
@Shared @Cached _PyUnicode_AsUTF8String asUTF8String) {
1032-
if (profile.profile(inliningTarget, s.getUtf8Bytes() == null)) {
1036+
if (hasUtf8Profile.profile(inliningTarget, s.getUtf8Bytes() == null)) {
10331037
PBytes bytes = (PBytes) asUTF8String.execute(s, T_STRICT);
10341038
s.setUtf8Bytes(bytes);
10351039
}
1040+
if (hasSizeProfile.profile(inliningTarget, !lib.isNull(sizePtr))) {
1041+
writeLongNode.write(sizePtr, s.getUtf8Bytes().getSequenceStorage().length());
1042+
}
10361043
return PySequenceArrayWrapper.ensureNativeSequence(s.getUtf8Bytes());
10371044
}
10381045

10391046
@Specialization
1040-
static Object doNative(PythonAbstractNativeObject s,
1041-
@CachedLibrary(limit = "2") InteropLibrary lib,
1047+
static Object doNative(PythonAbstractNativeObject s, Object sizePtr,
1048+
@Bind("this") Node inliningTarget,
1049+
@Shared @CachedLibrary(limit = "2") InteropLibrary lib,
1050+
@Shared @Cached InlinedConditionProfile hasSizeProfile,
1051+
@Shared @Cached InlinedConditionProfile hasUtf8Profile,
1052+
@Shared @Cached CStructAccess.WriteLongNode writeLongNode,
1053+
@Shared @Cached _PyUnicode_AsUTF8String asUTF8String,
10421054
@CachedLibrary(limit = "1") PythonBufferAccessLibrary bufferLib,
10431055
@Cached CStructAccess.ReadPointerNode readPointerNode,
10441056
@Cached CStructAccess.WritePointerNode writePointerNode,
10451057
@Cached CStructAccess.AllocateNode allocateNode,
10461058
@Cached CStructAccess.WriteByteNode writeByteNode,
1047-
@Cached CStructAccess.WriteLongNode writeLongNode,
1048-
@Shared @Cached _PyUnicode_AsUTF8String asUTF8String) {
1059+
@Cached CStructAccess.ReadI64Node readI64Node) {
10491060
Object utf8 = readPointerNode.readFromObj(s, CFields.PyCompactUnicodeObject__utf8);
1050-
if (lib.isNull(utf8)) {
1061+
if (hasUtf8Profile.profile(inliningTarget, lib.isNull(utf8))) {
10511062
PBytes bytes = (PBytes) asUTF8String.execute(s, T_STRICT);
10521063
int len = bufferLib.getBufferLength(bytes);
10531064
Object mem = allocateNode.alloc(len + 1, true);
1054-
writeByteNode.writeByteArray(mem, bufferLib.getInternalByteArray(bytes), len, 0, 0);
1065+
writeByteNode.writeByteArray(mem, bufferLib.getInternalOrCopiedByteArray(bytes), len, 0, 0);
10551066
writePointerNode.writeToObj(s, CFields.PyCompactUnicodeObject__utf8, mem);
10561067
writeLongNode.writeToObject(s, CFields.PyCompactUnicodeObject__utf8_length, len);
1068+
if (hasSizeProfile.profile(inliningTarget, !lib.isNull(sizePtr))) {
1069+
writeLongNode.write(sizePtr, len);
1070+
}
10571071
return mem;
1072+
} else {
1073+
if (hasSizeProfile.profile(inliningTarget, !lib.isNull(sizePtr))) {
1074+
writeLongNode.write(sizePtr, readI64Node.readFromObj(s, CFields.PyCompactUnicodeObject__utf8_length));
1075+
}
1076+
return utf8;
10581077
}
1059-
return utf8;
10601078
}
10611079

10621080
@Fallback
1063-
static Object doError(@SuppressWarnings("unused") Object s,
1081+
@SuppressWarnings("unused")
1082+
static Object doError(Object s, Object sizePtr,
10641083
@Cached PRaiseNode raiseNode) {
10651084
throw raiseNode.raise(TypeError, BAD_ARG_TYPE_FOR_BUILTIN_OP);
10661085
}
10671086
}
10681087

1069-
@CApiBuiltin(ret = Py_ssize_t, args = {PyObject}, call = Direct)
1070-
abstract static class PyTruffle_Unicode_AsUTF8AndSize_Size extends CApiUnaryBuiltinNode {
1071-
1072-
@Specialization
1073-
Object doUnicode(PString s) {
1074-
// PyTruffle_Unicode_AsUTF8AndSize_CharPtr must have been be called before
1075-
return s.getUtf8Bytes().getSequenceStorage().length();
1076-
}
1077-
1078-
@Specialization
1079-
Object doNative(PythonAbstractNativeObject s,
1080-
@Cached CStructAccess.ReadI64Node readI64Node) {
1081-
// PyTruffle_Unicode_AsUTF8AndSize_CharPtr must have been be called before
1082-
return readI64Node.readFromObj(s, CFields.PyCompactUnicodeObject__utf8_length);
1083-
}
1084-
}
1085-
10861088
@CApiBuiltin(ret = PY_UNICODE_PTR, args = {PyObject}, call = Direct)
10871089
abstract static class PyTruffle_Unicode_AsUnicodeAndSize_CharPtr extends CApiUnaryBuiltinNode {
10881090

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/capi/CApiFunction.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -477,7 +477,6 @@ public final class CApiFunction {
477477
@CApiBuiltin(name = "PyUnicode_AsUCS4", ret = PY_UCS4_PTR, args = {PyObject, PY_UCS4_PTR, Py_ssize_t, Int}, call = CImpl)
478478
@CApiBuiltin(name = "PyUnicode_AsUCS4Copy", ret = PY_UCS4_PTR, args = {PyObject}, call = CImpl)
479479
@CApiBuiltin(name = "PyUnicode_AsUTF8", ret = ConstCharPtrAsTruffleString, args = {PyObject}, call = CImpl)
480-
@CApiBuiltin(name = "PyUnicode_AsUTF8AndSize", ret = ConstCharPtrAsTruffleString, args = {PyObject, PY_SSIZE_T_PTR}, call = CImpl)
481480
@CApiBuiltin(name = "PyUnicode_AsUTF8String", ret = PyObject, args = {PyObject}, call = CImpl)
482481
@CApiBuiltin(name = "PyUnicode_AsUnicode", ret = PY_UNICODE_PTR, args = {PyObject}, call = CImpl)
483482
@CApiBuiltin(name = "PyUnicode_AsUnicodeAndSize", ret = PY_UNICODE_PTR, args = {PyObject, PY_SSIZE_T_PTR}, call = CImpl)

0 commit comments

Comments
 (0)