Skip to content

Commit 7c1a29f

Browse files
committed
Make logic for *unicode from wchar* independent of C API.
1 parent ed0a801 commit 7c1a29f

File tree

2 files changed

+181
-160
lines changed

2 files changed

+181
-160
lines changed

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/PythonCextBuiltins.java

Lines changed: 12 additions & 160 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,6 @@
4949
import java.io.PrintWriter;
5050
import java.math.BigInteger;
5151
import java.nio.ByteBuffer;
52-
import java.nio.ByteOrder;
5352
import java.nio.CharBuffer;
5453
import java.nio.charset.CharacterCodingException;
5554
import java.nio.charset.Charset;
@@ -110,8 +109,10 @@
110109
import com.oracle.graal.python.builtins.objects.cext.PythonNativeWrapperLibrary;
111110
import com.oracle.graal.python.builtins.objects.cext.UnicodeObjectNodes.UnicodeAsWideCharNode;
112111
import com.oracle.graal.python.builtins.objects.cext.common.CExtAsPythonObjectNode;
112+
import com.oracle.graal.python.builtins.objects.cext.common.CExtCommonNodes.Charsets;
113113
import com.oracle.graal.python.builtins.objects.cext.common.CExtCommonNodes.EncodeNativeStringNode;
114114
import com.oracle.graal.python.builtins.objects.cext.common.CExtCommonNodes.PCallCExtFunction;
115+
import com.oracle.graal.python.builtins.objects.cext.common.CExtCommonNodes.UnicodeFromWcharNode;
115116
import com.oracle.graal.python.builtins.objects.cext.common.CExtContext;
116117
import com.oracle.graal.python.builtins.objects.cext.common.CExtParseArgumentsNode;
117118
import com.oracle.graal.python.builtins.objects.cext.common.VaListWrapper;
@@ -137,7 +138,6 @@
137138
import com.oracle.graal.python.builtins.objects.object.PythonObjectLibrary;
138139
import com.oracle.graal.python.builtins.objects.set.PBaseSet;
139140
import com.oracle.graal.python.builtins.objects.str.PString;
140-
import com.oracle.graal.python.builtins.objects.str.StringNodesFactory.StringLenNodeGen;
141141
import com.oracle.graal.python.builtins.objects.traceback.PTraceback;
142142
import com.oracle.graal.python.builtins.objects.tuple.PTuple;
143143
import com.oracle.graal.python.builtins.objects.type.LazyPythonClass;
@@ -190,7 +190,6 @@
190190
import com.oracle.graal.python.runtime.ExecutionContext.IndirectCallContext;
191191
import com.oracle.graal.python.runtime.PythonContext;
192192
import com.oracle.graal.python.runtime.PythonCore;
193-
import com.oracle.graal.python.runtime.PythonOptions;
194193
import com.oracle.graal.python.runtime.exception.ExceptionUtils;
195194
import com.oracle.graal.python.runtime.exception.PException;
196195
import com.oracle.graal.python.runtime.exception.PythonErrorType;
@@ -200,7 +199,6 @@
200199
import com.oracle.graal.python.runtime.sequence.storage.MroSequenceStorage;
201200
import com.oracle.graal.python.runtime.sequence.storage.SequenceStorage;
202201
import com.oracle.truffle.api.Assumption;
203-
import com.oracle.truffle.api.CompilerAsserts;
204202
import com.oracle.truffle.api.CompilerDirectives;
205203
import com.oracle.truffle.api.CompilerDirectives.CompilationFinal;
206204
import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary;
@@ -228,8 +226,6 @@
228226
import com.oracle.truffle.api.interop.UnsupportedMessageException;
229227
import com.oracle.truffle.api.interop.UnsupportedTypeException;
230228
import com.oracle.truffle.api.library.CachedLibrary;
231-
import com.oracle.truffle.api.nodes.ExplodeLoop;
232-
import com.oracle.truffle.api.nodes.ExplodeLoop.LoopExplosionKind;
233229
import com.oracle.truffle.api.nodes.Node;
234230
import com.oracle.truffle.api.nodes.NodeVisitor;
235231
import com.oracle.truffle.api.nodes.RootNode;
@@ -961,43 +957,6 @@ private PRaiseNativeNode ensureRaiseNativeNode() {
961957
}
962958

963959
abstract static class NativeUnicodeBuiltin extends NativeBuiltin {
964-
private static final int NATIVE_ORDER = 0;
965-
private static Charset UTF32;
966-
private static Charset UTF32LE;
967-
private static Charset UTF32BE;
968-
969-
@TruffleBoundary
970-
protected static Charset getUTF32Charset(int byteorder) {
971-
String utf32Name = getUTF32Name(byteorder);
972-
if (byteorder == NativeUnicodeBuiltin.NATIVE_ORDER) {
973-
if (UTF32 == null) {
974-
UTF32 = Charset.forName(utf32Name);
975-
}
976-
return UTF32;
977-
} else if (byteorder < NativeUnicodeBuiltin.NATIVE_ORDER) {
978-
if (UTF32LE == null) {
979-
UTF32LE = Charset.forName(utf32Name);
980-
}
981-
return UTF32LE;
982-
}
983-
if (UTF32BE == null) {
984-
UTF32BE = Charset.forName(utf32Name);
985-
}
986-
return UTF32BE;
987-
}
988-
989-
protected static String getUTF32Name(int byteorder) {
990-
String csName;
991-
if (byteorder == 0) {
992-
csName = "UTF-32";
993-
} else if (byteorder < 0) {
994-
csName = "UTF-32LE";
995-
} else {
996-
csName = "UTF-32BE";
997-
}
998-
return csName;
999-
}
1000-
1001960
@TruffleBoundary
1002961
protected static CharBuffer allocateCharBuffer(int cap) {
1003962
return CharBuffer.allocate(cap);
@@ -1012,7 +971,6 @@ protected static String toString(CharBuffer cb) {
1012971
protected static int remaining(ByteBuffer cb) {
1013972
return cb.remaining();
1014973
}
1015-
1016974
}
1017975

1018976
@Builtin(name = "TrufflePInt_AsPrimitive", minNumOfPositionalArgs = 3)
@@ -1130,109 +1088,17 @@ private PRaiseNativeNode ensureRaiseNativeNode() {
11301088

11311089
@Builtin(name = "PyTruffle_Unicode_FromWchar", minNumOfPositionalArgs = 4)
11321090
@GenerateNodeFactory
1133-
@TypeSystemReference(PythonArithmeticTypes.class)
1134-
@ImportStatic(PythonOptions.class)
11351091
abstract static class PyTruffle_Unicode_FromWchar extends NativeUnicodeBuiltin {
1136-
@Specialization(guards = "elementSize == cachedElementSize", limit = "getVariableArgumentInlineCacheLimit()")
1137-
Object doBytes(VirtualFrame frame, Object arr, long n, long elementSize, Object errorMarker,
1138-
@Cached CExtNodes.ToSulongNode toSulongNode,
1139-
@Cached("elementSize") long cachedElementSize,
1140-
@CachedLibrary("arr") InteropLibrary lib,
1141-
@CachedLibrary(limit = "1") InteropLibrary elemLib) {
1142-
try {
1143-
ByteBuffer bytes;
1144-
if (cachedElementSize == 1L || cachedElementSize == 2L || cachedElementSize == 4L) {
1145-
if (!lib.hasArrayElements(arr)) {
1146-
return raiseNative(frame, errorMarker, PythonErrorType.SystemError, "provided object is not an array", elementSize);
1147-
}
1148-
bytes = readWithSize(lib, elemLib, arr, PInt.intValueExact(n), (int) cachedElementSize);
1149-
bytes.flip();
1150-
} else {
1151-
return raiseNative(frame, errorMarker, PythonErrorType.ValueError, "unsupported 'wchar_t' size; was: %d", elementSize);
1152-
}
1153-
return toSulongNode.execute(decode(bytes));
1154-
} catch (ArithmeticException e) {
1155-
return raiseNative(frame, errorMarker, PythonErrorType.ValueError, "array size too large");
1156-
} catch (CharacterCodingException e) {
1157-
return raiseNative(frame, errorMarker, PythonErrorType.UnicodeError, "%m", e);
1158-
} catch (IllegalArgumentException e) {
1159-
return raiseNative(frame, errorMarker, PythonErrorType.LookupError, "%m", e);
1160-
} catch (InteropException e) {
1161-
return raiseNative(frame, errorMarker, PythonErrorType.TypeError, "%m", e);
1162-
} catch (IllegalElementTypeException e) {
1163-
return raiseNative(frame, errorMarker, PythonErrorType.UnicodeDecodeError, "Invalid input element type '%p'", e.elem);
1164-
}
1165-
}
1166-
1167-
@Specialization(limit = "getVariableArgumentInlineCacheLimit()")
1168-
Object doBytes(VirtualFrame frame, Object arr, PInt n, PInt elementSize, Object errorMarker,
1092+
@Specialization
1093+
static Object doBytes(VirtualFrame frame, Object arr, long n, long elementSize, Object errorMarker,
1094+
@Cached UnicodeFromWcharNode unicodeFromWcharNode,
11691095
@Cached CExtNodes.ToSulongNode toSulongNode,
1170-
@CachedLibrary("arr") InteropLibrary lib,
1171-
@CachedLibrary(limit = "1") InteropLibrary elemLib) {
1096+
@Cached TransformExceptionToNativeNode transformExceptionToNativeNode) {
11721097
try {
1173-
long es = elementSize.longValueExact();
1174-
return doBytes(frame, arr, n.longValueExact(), es, errorMarker, toSulongNode, es, lib, elemLib);
1175-
} catch (ArithmeticException e) {
1176-
return raiseNative(frame, errorMarker, PythonErrorType.ValueError, "invalid parameters");
1177-
}
1178-
}
1179-
1180-
@TruffleBoundary
1181-
private static String decode(ByteBuffer bytes) throws CharacterCodingException {
1182-
return getUTF32Charset(0).newDecoder().decode(bytes).toString();
1183-
}
1184-
1185-
private static ByteBuffer readWithSize(InteropLibrary arrLib, InteropLibrary elemLib, Object o, int size, int elementSize)
1186-
throws UnsupportedMessageException, InvalidArrayIndexException, IllegalElementTypeException {
1187-
ByteBuffer buf = allocate(size * Integer.BYTES);
1188-
for (int i = 0; i < size; i += elementSize) {
1189-
putInt(buf, readElement(arrLib, elemLib, o, i, elementSize));
1190-
}
1191-
return buf;
1192-
}
1193-
1194-
@ExplodeLoop(kind = LoopExplosionKind.FULL_EXPLODE_UNTIL_RETURN)
1195-
private static int readElement(InteropLibrary arrLib, InteropLibrary elemLib, Object arr, int i, int elementSize)
1196-
throws InvalidArrayIndexException, UnsupportedMessageException, IllegalElementTypeException {
1197-
byte[] barr = new byte[4];
1198-
CompilerAsserts.partialEvaluationConstant(elementSize);
1199-
for (int j = 0; j < elementSize; j++) {
1200-
Object elem = arrLib.readArrayElement(arr, i + j);
1201-
// The array object could be one of our wrappers (e.g. 'PySequenceArrayWrapper').
1202-
// Since the Interop library does not allow to specify how many bytes we want to
1203-
// read when we do readArrayElement, our wrappers always return long. So, we check
1204-
// for 'long' here and cast down to 'byte'.
1205-
if (elemLib.fitsInLong(elem)) {
1206-
barr[j] = (byte) elemLib.asLong(elem);
1207-
} else {
1208-
CompilerDirectives.transferToInterpreter();
1209-
throw new IllegalElementTypeException(elem);
1210-
}
1211-
}
1212-
return toInt(barr);
1213-
}
1214-
1215-
@TruffleBoundary(allowInlining = true)
1216-
private static int toInt(byte[] barr) {
1217-
return ByteBuffer.wrap(barr).order(ByteOrder.LITTLE_ENDIAN).getInt();
1218-
}
1219-
1220-
@TruffleBoundary(allowInlining = true)
1221-
private static ByteBuffer allocate(int cap) {
1222-
return ByteBuffer.allocate(cap);
1223-
}
1224-
1225-
@TruffleBoundary(allowInlining = true)
1226-
private static void putInt(ByteBuffer buf, int element) {
1227-
buf.putInt(element);
1228-
}
1229-
1230-
private static final class IllegalElementTypeException extends Exception {
1231-
private static final long serialVersionUID = 0L;
1232-
private final Object elem;
1233-
1234-
IllegalElementTypeException(Object elem) {
1235-
this.elem = elem;
1098+
return toSulongNode.execute(unicodeFromWcharNode.execute(arr, n, elementSize));
1099+
} catch (PException e) {
1100+
transformExceptionToNativeNode.execute(frame, e);
1101+
return errorMarker;
12361102
}
12371103
}
12381104
}
@@ -1288,20 +1154,6 @@ Object doUnicode(VirtualFrame frame, PString s, String errors, Object error_mark
12881154
Object doUnicode(VirtualFrame frame, @SuppressWarnings("unused") Object s, @SuppressWarnings("unused") Object errors, Object errorMarker) {
12891155
return raiseBadArgument(frame, errorMarker);
12901156
}
1291-
1292-
@TruffleBoundary(transferToInterpreterOnException = false)
1293-
private byte[] doEncode(PString s, String errors) throws CharacterCodingException {
1294-
CharsetEncoder encoder = charset.newEncoder();
1295-
CodingErrorAction action = BytesBuiltins.toCodingErrorAction(errors, this);
1296-
encoder.onMalformedInput(action).onUnmappableCharacter(action);
1297-
CharBuffer buf = CharBuffer.allocate(StringLenNodeGen.getUncached().execute(s));
1298-
buf.put(s.getValue());
1299-
buf.flip();
1300-
ByteBuffer encoded = encoder.encode(buf);
1301-
byte[] barr = new byte[encoded.remaining()];
1302-
encoded.get(barr);
1303-
return barr;
1304-
}
13051157
}
13061158

13071159
@Builtin(name = "_PyUnicode_AsUTF8String", minNumOfPositionalArgs = 3)
@@ -1365,7 +1217,7 @@ Object doUnicode(VirtualFrame frame, TruffleObject o, long size, String errors,
13651217
} catch (CharacterCodingException e) {
13661218
return raiseNative(frame, errorMarker, PythonErrorType.UnicodeEncodeError, "%m", e);
13671219
} catch (IllegalArgumentException e) {
1368-
String csName = getUTF32Name(byteorder);
1220+
String csName = Charsets.getUTF32Name(byteorder);
13691221
return raiseNative(frame, errorMarker, PythonErrorType.LookupError, "unknown encoding: " + csName);
13701222
} catch (InteropException e) {
13711223
return raiseNative(frame, errorMarker, PythonErrorType.TypeError, "%m", e);
@@ -1374,7 +1226,7 @@ Object doUnicode(VirtualFrame frame, TruffleObject o, long size, String errors,
13741226

13751227
@TruffleBoundary
13761228
private String decodeUTF32(byte[] data, int size, String errors, int byteorder) throws CharacterCodingException {
1377-
CharsetDecoder decoder = getUTF32Charset(byteorder).newDecoder();
1229+
CharsetDecoder decoder = Charsets.getUTF32Charset(byteorder).newDecoder();
13781230
CodingErrorAction action = BytesBuiltins.toCodingErrorAction(errors, this);
13791231
CharBuffer decode = decoder.onMalformedInput(action).onUnmappableCharacter(action).decode(wrap(data, 0, size));
13801232
return decode.toString();

0 commit comments

Comments
 (0)