Skip to content

Commit b80df21

Browse files
committed
Implement C API function PyUnicode_Decode.
1 parent 1038306 commit b80df21

File tree

2 files changed

+56
-15
lines changed

2 files changed

+56
-15
lines changed

graalpython/com.oracle.graal.python.cext/src/unicodeobject.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -476,3 +476,11 @@ UPCALL_ID(PyUnicode_AsUnicodeEscapeString);
476476
PyObject * PyUnicode_AsUnicodeEscapeString(PyObject *unicode) {
477477
return UPCALL_CEXT_O(_jls_PyUnicode_AsUnicodeEscapeString, native_to_java(unicode));
478478
}
479+
480+
UPCALL_ID(PyUnicode_Decode);
481+
PyObject * PyUnicode_Decode(const char *s, Py_ssize_t size, const char *encoding, const char *errors) {
482+
if (encoding == NULL) {
483+
return PyUnicode_DecodeUTF8Stateful(s, size, errors, NULL);
484+
}
485+
return UPCALL_CEXT_O(_jls_PyUnicode_Decode, s, size, polyglot_from_string(encoding, SRC_CS), polyglot_from_string(errors, SRC_CS));
486+
}

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/PythonCextBuiltins.java

Lines changed: 48 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
import java.nio.charset.CoderResult;
5858
import java.nio.charset.CodingErrorAction;
5959
import java.nio.charset.StandardCharsets;
60+
import java.nio.charset.UnsupportedCharsetException;
6061
import java.text.DecimalFormat;
6162
import java.text.ParseException;
6263
import java.text.ParsePosition;
@@ -945,6 +946,22 @@ protected static String getUTF32Name(int byteorder) {
945946
}
946947
return csName;
947948
}
949+
950+
@TruffleBoundary
951+
protected static CharBuffer allocateCharBuffer(int cap) {
952+
return CharBuffer.allocate(cap);
953+
}
954+
955+
@TruffleBoundary
956+
protected static String toString(CharBuffer cb) {
957+
return cb.toString();
958+
}
959+
960+
@TruffleBoundary
961+
protected static int remaining(ByteBuffer cb) {
962+
return cb.remaining();
963+
}
964+
948965
}
949966

950967
@Builtin(name = "TrufflePInt_AsPrimitive", minNumOfPositionalArgs = 3)
@@ -2555,21 +2572,6 @@ Object doUtf8Decode(VirtualFrame frame, Object module, Object cByteArray, String
25552572
}
25562573
}
25572574

2558-
@TruffleBoundary
2559-
private static CharBuffer allocateCharBuffer(int cap) {
2560-
return CharBuffer.allocate(cap);
2561-
}
2562-
2563-
@TruffleBoundary
2564-
private static String toString(CharBuffer cb) {
2565-
return cb.toString();
2566-
}
2567-
2568-
@TruffleBoundary
2569-
private static int remaining(ByteBuffer cb) {
2570-
return cb.remaining();
2571-
}
2572-
25732575
@TruffleBoundary
25742576
private CoderResult decodeUTF8(CharBuffer resultBuffer, ByteBuffer inputBuffer, String errors) {
25752577
CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder();
@@ -2626,4 +2628,35 @@ private Number parse(String source) throws ParseException {
26262628
return DecimalFormat.getInstance().parse(source);
26272629
}
26282630
}
2631+
2632+
@Builtin(name = "PyUnicode_Decode", minNumOfPositionalArgs = 5, declaresExplicitSelf = true)
2633+
@GenerateNodeFactory
2634+
abstract static class PyUnicode_Decode extends NativeUnicodeBuiltin {
2635+
2636+
@Specialization
2637+
Object doDecode(VirtualFrame frame, Object module, Object cByteArray, long size, String encoding, String errors,
2638+
@Cached CExtNodes.ToSulongNode toSulongNode,
2639+
@Cached GetByteArrayNode getByteArrayNode,
2640+
@Cached GetNativeNullNode getNativeNullNode) {
2641+
2642+
try {
2643+
ByteBuffer inputBuffer = wrap(getByteArrayNode.execute(frame, cByteArray, size));
2644+
int n = remaining(inputBuffer);
2645+
CharBuffer resultBuffer = allocateCharBuffer(n * 4);
2646+
decode(resultBuffer, inputBuffer, encoding, errors);
2647+
return toSulongNode.execute(factory().createTuple(new Object[]{toString(resultBuffer), n - remaining(inputBuffer)}));
2648+
} catch (IllegalArgumentException e) {
2649+
return raiseNative(frame, getNativeNullNode.execute(module), PythonErrorType.LookupError, "unknown encoding: " + encoding);
2650+
} catch (InteropException e) {
2651+
return raiseNative(frame, getNativeNullNode.execute(module), PythonErrorType.TypeError, "%m", e);
2652+
}
2653+
}
2654+
2655+
@TruffleBoundary
2656+
private CoderResult decode(CharBuffer resultBuffer, ByteBuffer inputBuffer, String encoding, String errors) {
2657+
CharsetDecoder decoder = Charset.forName(encoding).newDecoder();
2658+
CodingErrorAction action = BytesBuiltins.toCodingErrorAction(errors, this);
2659+
return decoder.onMalformedInput(CodingErrorAction.REPORT).onUnmappableCharacter(action).decode(inputBuffer, resultBuffer, true);
2660+
}
2661+
}
26292662
}

0 commit comments

Comments
 (0)