Skip to content

Commit 056cbf2

Browse files
committed
[GR-17881] Correctly read from UCS2/4 arrays and add tests.
PullRequest: graalpython/630
2 parents 9f8c25f + d783363 commit 056cbf2

File tree

3 files changed

+102
-33
lines changed

3 files changed

+102
-33
lines changed

graalpython/com.oracle.graal.python.cext/src/unicodeobject.c

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -418,12 +418,20 @@ static PyObject* _PyUnicode_FromUCS1(const Py_UCS1* u, Py_ssize_t size) {
418418
return polyglot_from_string((const char *) u, "ISO-8859-1");
419419
}
420420

421+
typedef PyObject*(*PyTruffle_Unicode_FromWchar_t)(int8_t*, int64_t, int64_t, void*);
422+
421423
static PyObject* _PyUnicode_FromUCS2(const Py_UCS2 *u, Py_ssize_t size) {
422-
return UPCALL_CEXT_O(_jls_PyTruffle_Unicode_FromWchar, polyglot_from_i16_array(u, size), 2, NULL);
424+
// This does deliberately not use UPCALL_CEXT_O to avoid argument conversion since
425+
// 'PyTruffle_Unicode_FromWchar' really expects the bare pointer.
426+
int64_t bsize = size * sizeof(Py_UCS2);
427+
return ((PyTruffle_Unicode_FromWchar_t) _jls_PyTruffle_Unicode_FromWchar)(polyglot_from_i8_array((int8_t*)u, bsize), bsize, 2, NULL);
423428
}
424429

425430
static PyObject* _PyUnicode_FromUCS4(const Py_UCS4 *u, Py_ssize_t size) {
426-
return UPCALL_CEXT_O(_jls_PyTruffle_Unicode_FromWchar, polyglot_from_i32_array(u, size), 4, NULL);
431+
// This does deliberately not use UPCALL_CEXT_O to avoid argument conversion since
432+
// 'PyTruffle_Unicode_FromWchar' really expects the bare pointer.
433+
int64_t bsize = size * sizeof(Py_UCS4);
434+
return ((PyTruffle_Unicode_FromWchar_t) _jls_PyTruffle_Unicode_FromWchar)(polyglot_from_i8_array((int8_t*)u, bsize), bsize, 4, NULL);
427435
}
428436

429437
// taken from CPython "Python/Objects/unicodeobject.c"

graalpython/com.oracle.graal.python.test/src/tests/cpyext/test_unicode.py

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -482,17 +482,27 @@ def compile_module(self, name):
482482
arguments=["int ordinal"],
483483
cmpfunc=unhandled_error_compare
484484
)
485-
486485

487-
test_PyUnicode_AsUnicodeEscapeString = CPyExtFunction(
488-
_reference_unicode_escape,
486+
# NOTE: this test assumes that Python uses UTF-8 encoding for source files
487+
test_PyUnicode_FromKindAndData = CPyExtFunction(
488+
lambda args: args[3],
489489
lambda: (
490-
("abcd", ),
491-
("öüä", ),
490+
(4, bytearray([0xA2, 0x0E, 0x02, 0x00]), 1, "𠺢"),
491+
(4, bytearray([0xA2, 0x0E, 0x02, 0x00, 0x4C, 0x0F, 0x02, 0x00]), 2, "𠺢𠽌"),
492+
(2, bytearray([0x30, 0x20]), 1, "‰"),
493+
(2, bytearray([0x30, 0x20, 0x3C, 0x20]), 2, "‰‼"),
492494
),
495+
code='''PyObject* wrap_PyUnicode_FromKindAndData(int kind, Py_buffer buffer, Py_ssize_t size, PyObject* dummy) {
496+
PyObject* res;
497+
res = PyUnicode_FromKindAndData(kind, (const char *)buffer.buf, size);
498+
Py_XINCREF(res);
499+
return res;
500+
}
501+
''',
493502
resultspec="O",
494-
argspec='O',
495-
arguments=["PyObject* str"],
503+
argspec='iy*nO',
504+
arguments=["int kind", "Py_buffer buffer", "Py_ssize_t size", "PyObject* dummy"],
505+
callfunction="wrap_PyUnicode_FromKindAndData",
496506
cmpfunc=unhandled_error_compare
497507
)
498508

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/PythonCextBuiltins.java

Lines changed: 75 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
import java.io.PrintWriter;
5050
import java.math.BigInteger;
5151
import java.nio.ByteBuffer;
52+
import java.nio.ByteOrder;
5253
import java.nio.CharBuffer;
5354
import java.nio.charset.CharacterCodingException;
5455
import java.nio.charset.Charset;
@@ -173,6 +174,7 @@
173174
import com.oracle.graal.python.runtime.ExecutionContext.IndirectCallContext;
174175
import com.oracle.graal.python.runtime.PythonContext;
175176
import com.oracle.graal.python.runtime.PythonCore;
177+
import com.oracle.graal.python.runtime.PythonOptions;
176178
import com.oracle.graal.python.runtime.exception.ExceptionUtils;
177179
import com.oracle.graal.python.runtime.exception.PException;
178180
import com.oracle.graal.python.runtime.exception.PythonErrorType;
@@ -206,6 +208,7 @@
206208
import com.oracle.truffle.api.interop.UnsupportedMessageException;
207209
import com.oracle.truffle.api.interop.UnsupportedTypeException;
208210
import com.oracle.truffle.api.library.CachedLibrary;
211+
import com.oracle.truffle.api.nodes.ExplodeLoop;
209212
import com.oracle.truffle.api.nodes.Node;
210213
import com.oracle.truffle.api.nodes.NodeVisitor;
211214
import com.oracle.truffle.api.nodes.RootNode;
@@ -1071,44 +1074,50 @@ private <T> T raiseNative(VirtualFrame frame, T defaultValue, PythonBuiltinClass
10711074
}
10721075
}
10731076

1074-
@Builtin(name = "PyTruffle_Unicode_FromWchar", minNumOfPositionalArgs = 3)
1077+
@Builtin(name = "PyTruffle_Unicode_FromWchar", minNumOfPositionalArgs = 4)
10751078
@GenerateNodeFactory
10761079
@TypeSystemReference(PythonArithmeticTypes.class)
1080+
@ImportStatic(PythonOptions.class)
10771081
abstract static class PyTruffle_Unicode_FromWchar extends NativeUnicodeBuiltin {
1078-
@Specialization
1079-
Object doBytes(VirtualFrame frame, Object o, long elementSize, Object errorMarker,
1080-
@Shared("getByteArrayNode") @Cached GetByteArrayNode getByteArrayNode,
1081-
@Shared("lib") @CachedLibrary(limit = "3") InteropLibrary lib) {
1082+
@Specialization(guards = "elementSize == cachedElementSize", limit = "getVariableArgumentInlineCacheLimit()")
1083+
Object doBytes(VirtualFrame frame, Object arr, long n, long elementSize, Object errorMarker,
1084+
@Cached CExtNodes.ToSulongNode toSulongNode,
1085+
@Cached("elementSize") long cachedElementSize,
1086+
@CachedLibrary("arr") InteropLibrary lib,
1087+
@CachedLibrary(limit = "1") InteropLibrary elemLib) {
10821088
try {
10831089
ByteBuffer bytes;
1084-
if (elementSize == 2L) {
1085-
if (!lib.hasArrayElements(o)) {
1090+
if (cachedElementSize == 1L || cachedElementSize == 2L || cachedElementSize == 4L) {
1091+
if (!lib.hasArrayElements(arr)) {
10861092
return raiseNative(frame, errorMarker, PythonErrorType.SystemError, "provided object is not an array", elementSize);
10871093
}
1088-
long size = lib.getArraySize(o);
1089-
bytes = readWithSize(lib, o, (int) size);
1094+
bytes = readWithSize(lib, elemLib, arr, PInt.intValueExact(n), (int) cachedElementSize);
10901095
bytes.flip();
1091-
} else if (elementSize == 4L) {
1092-
bytes = wrap(getByteArrayNode.execute(frame, o, -1));
10931096
} else {
10941097
return raiseNative(frame, errorMarker, PythonErrorType.ValueError, "unsupported 'wchar_t' size; was: %d", elementSize);
10951098
}
1096-
return decode(bytes);
1099+
return toSulongNode.execute(decode(bytes));
1100+
} catch (ArithmeticException e) {
1101+
return raiseNative(frame, errorMarker, PythonErrorType.ValueError, "array size too large");
10971102
} catch (CharacterCodingException e) {
10981103
return raiseNative(frame, errorMarker, PythonErrorType.UnicodeError, "%m", e);
10991104
} catch (IllegalArgumentException e) {
11001105
return raiseNative(frame, errorMarker, PythonErrorType.LookupError, "%m", e);
11011106
} catch (InteropException e) {
11021107
return raiseNative(frame, errorMarker, PythonErrorType.TypeError, "%m", e);
1108+
} catch (IllegalElementTypeException e) {
1109+
return raiseNative(frame, errorMarker, PythonErrorType.UnicodeDecodeError, "Invalid input element type '%p'", e.elem);
11031110
}
11041111
}
11051112

1106-
@Specialization
1107-
Object doBytes(VirtualFrame frame, Object o, PInt elementSize, Object errorMarker,
1108-
@Shared("getByteArrayNode") @Cached GetByteArrayNode getByteArrayNode,
1109-
@Shared("lib") @CachedLibrary(limit = "3") InteropLibrary lib) {
1113+
@Specialization(limit = "getVariableArgumentInlineCacheLimit()")
1114+
Object doBytes(VirtualFrame frame, Object arr, PInt n, PInt elementSize, Object errorMarker,
1115+
@Cached CExtNodes.ToSulongNode toSulongNode,
1116+
@CachedLibrary("arr") InteropLibrary lib,
1117+
@CachedLibrary(limit = "1") InteropLibrary elemLib) {
11101118
try {
1111-
return doBytes(frame, o, elementSize.longValueExact(), errorMarker, getByteArrayNode, lib);
1119+
long es = elementSize.longValueExact();
1120+
return doBytes(frame, arr, n.longValueExact(), es, errorMarker, toSulongNode, es, lib, elemLib);
11121121
} catch (ArithmeticException e) {
11131122
return raiseNative(frame, errorMarker, PythonErrorType.ValueError, "invalid parameters");
11141123
}
@@ -1119,16 +1128,58 @@ private static String decode(ByteBuffer bytes) throws CharacterCodingException {
11191128
return getUTF32Charset(0).newDecoder().decode(bytes).toString();
11201129
}
11211130

1122-
@TruffleBoundary
1123-
private static ByteBuffer readWithSize(InteropLibrary interopLib, Object o, int size) throws UnsupportedMessageException, InvalidArrayIndexException {
1124-
ByteBuffer buf = ByteBuffer.allocate(size * Integer.BYTES);
1125-
for (long i = 0; i < size; i++) {
1126-
Object elem = interopLib.readArrayElement(o, i);
1127-
assert elem instanceof Number && 0 <= ((Number) elem).intValue() && ((Number) elem).intValue() < (1 << 16);
1128-
buf.putInt(((Number) elem).intValue());
1131+
private static ByteBuffer readWithSize(InteropLibrary arrLib, InteropLibrary elemLib, Object o, int size, int elementSize)
1132+
throws UnsupportedMessageException, InvalidArrayIndexException, IllegalElementTypeException {
1133+
ByteBuffer buf = allocate(size * Integer.BYTES);
1134+
for (int i = 0; i < size; i += elementSize) {
1135+
putInt(buf, readElement(arrLib, elemLib, o, i, elementSize));
11291136
}
11301137
return buf;
11311138
}
1139+
1140+
@ExplodeLoop
1141+
private static int readElement(InteropLibrary arrLib, InteropLibrary elemLib, Object arr, int i, int elementSize)
1142+
throws InvalidArrayIndexException, UnsupportedMessageException, IllegalElementTypeException {
1143+
byte[] barr = new byte[4];
1144+
for (int j = 0; j < elementSize; j++) {
1145+
Object elem = arrLib.readArrayElement(arr, i + j);
1146+
// The array object could be one of our wrappers (e.g. 'PySequenceArrayWrapper').
1147+
// Since the Interop library does not allow to specify how many bytes we want to
1148+
// read when we do readArrayElement, our wrappers always return long. So, we check
1149+
// for 'long' here and cast down to 'byte'.
1150+
if (elemLib.fitsInLong(elem)) {
1151+
barr[j] = (byte) elemLib.asLong(elem);
1152+
} else {
1153+
CompilerDirectives.transferToInterpreter();
1154+
throw new IllegalElementTypeException(elem);
1155+
}
1156+
}
1157+
return toInt(barr);
1158+
}
1159+
1160+
@TruffleBoundary(allowInlining = true)
1161+
private static int toInt(byte[] barr) {
1162+
return ByteBuffer.wrap(barr).order(ByteOrder.LITTLE_ENDIAN).getInt();
1163+
}
1164+
1165+
@TruffleBoundary(allowInlining = true)
1166+
private static ByteBuffer allocate(int cap) {
1167+
return ByteBuffer.allocate(cap);
1168+
}
1169+
1170+
@TruffleBoundary(allowInlining = true)
1171+
private static void putInt(ByteBuffer buf, int element) {
1172+
buf.putInt(element);
1173+
}
1174+
1175+
private static final class IllegalElementTypeException extends Exception {
1176+
private static final long serialVersionUID = 0L;
1177+
private final Object elem;
1178+
1179+
IllegalElementTypeException(Object elem) {
1180+
this.elem = elem;
1181+
}
1182+
}
11321183
}
11331184

11341185
@Builtin(name = "PyTruffle_Unicode_FromUTF8", minNumOfPositionalArgs = 2)

0 commit comments

Comments
 (0)