Skip to content

Commit 5416f71

Browse files
committed
Unify PyUnicode_Decode with codecs module
1 parent 9e7f957 commit 5416f71

File tree

2 files changed

+15
-26
lines changed

2 files changed

+15
-26
lines changed

graalpython/com.oracle.graal.python.cext/src/unicodeobject.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -568,13 +568,16 @@ PyObject * PyUnicode_AsUnicodeEscapeString(PyObject *unicode) {
568568
return UPCALL_CEXT_O(_jls_PyUnicode_AsUnicodeEscapeString, native_to_java(unicode));
569569
}
570570

571-
typedef PyObject* (*unicode_PyUnicode_Decode_fun_t)(void *data, const char *encoding, const char *errors);
572-
UPCALL_TYPED_ID(PyUnicode_Decode, unicode_PyUnicode_Decode_fun_t);
571+
UPCALL_ID(PyUnicode_Decode);
573572
PyObject * PyUnicode_Decode(const char *s, Py_ssize_t size, const char *encoding, const char *errors) {
574573
if (encoding == NULL) {
575574
return PyUnicode_DecodeUTF8Stateful(s, size, errors, NULL);
576575
}
577-
return _jls_PyUnicode_Decode(polyglot_from_i8_array(s, size), polyglot_from_string(encoding, SRC_CS), convert_errors(errors));
576+
PyObject *mv = PyMemoryView_FromMemory(s, size, PyBUF_READ);
577+
if (!mv) {
578+
return NULL;
579+
}
580+
return UPCALL_CEXT_O(_jls_PyUnicode_Decode, mv, polyglot_from_string(encoding, SRC_CS), convert_errors(errors));
578581
}
579582

580583
PyObject * PyUnicode_DecodeASCII(const char *s, Py_ssize_t size, const char *errors) {

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/PythonCextBuiltins.java

Lines changed: 9 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2595,35 +2595,21 @@ protected static boolean isReprFormatCode(int formatCode) {
25952595

25962596
@Builtin(name = "PyUnicode_Decode", minNumOfPositionalArgs = 4, declaresExplicitSelf = true)
25972597
@GenerateNodeFactory
2598-
abstract static class PyUnicode_Decode extends NativeUnicodeBuiltin {
2598+
abstract static class PyUnicode_Decode extends NativeBuiltin {
25992599

26002600
@Specialization
2601-
Object doDecode(VirtualFrame frame, Object module, Object cByteArray, String encoding, String errors,
2602-
@Cached CExtNodes.ToSulongNode toSulongNode,
2603-
@Cached GetByteArrayNode getByteArrayNode,
2601+
Object doDecode(VirtualFrame frame, Object module, PMemoryView mv, String encoding, String errors,
2602+
@Cached CodecsModuleBuiltins.DecodeNode decodeNode,
2603+
@Cached CExtNodes.ToNewRefNode toSulongNode,
2604+
@Cached TransformExceptionToNativeNode transformExceptionToNativeNode,
26042605
@Cached GetNativeNullNode getNativeNullNode) {
2605-
26062606
try {
2607-
ByteBuffer inputBuffer = wrap(getByteArrayNode.execute(cByteArray, -1));
2608-
int n = remaining(inputBuffer);
2609-
CharBuffer resultBuffer = allocateCharBuffer(n * 4);
2610-
decode(resultBuffer, inputBuffer, encoding, errors);
2611-
return toSulongNode.execute(factory().createTuple(new Object[]{toString(resultBuffer), n - remaining(inputBuffer)}));
2612-
} catch (IllegalArgumentException e) {
2613-
return raiseNative(frame, getNativeNullNode.execute(module), PythonErrorType.LookupError, ErrorMessages.UNKNOWN_ENCODING, encoding);
2614-
} catch (InteropException e) {
2615-
return raiseNative(frame, getNativeNullNode.execute(module), PythonErrorType.TypeError, "%m", e);
2616-
} catch (OverflowException e) {
2617-
return raiseNative(frame, getNativeNullNode.execute(module), PythonErrorType.SystemError, ErrorMessages.INPUT_TOO_LONG);
2607+
return toSulongNode.execute(decodeNode.executeWithStrings(frame, mv, encoding, errors));
2608+
} catch (PException e) {
2609+
transformExceptionToNativeNode.execute(frame, e);
2610+
return toSulongNode.execute(getNativeNullNode.execute(module));
26182611
}
26192612
}
2620-
2621-
@TruffleBoundary
2622-
private void decode(CharBuffer resultBuffer, ByteBuffer inputBuffer, String encoding, String errors) {
2623-
CharsetDecoder decoder = Charset.forName(encoding).newDecoder();
2624-
CodingErrorAction action = BytesBuiltins.toCodingErrorAction(errors, this);
2625-
decoder.onMalformedInput(CodingErrorAction.REPORT).onUnmappableCharacter(action).decode(inputBuffer, resultBuffer, true);
2626-
}
26272613
}
26282614

26292615
@Builtin(name = "PyObject_Size", minNumOfPositionalArgs = 1)

0 commit comments

Comments
 (0)