Skip to content

Commit aa28502

Browse files
committed
Refactor unicode node 'asWideChar'.
1 parent 54d7431 commit aa28502

File tree

3 files changed

+110
-35
lines changed

3 files changed

+110
-35
lines changed

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/TruffleCextBuiltins.java

Lines changed: 10 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@
6767
import com.oracle.graal.python.builtins.objects.cpyobject.PythonNativeClass;
6868
import com.oracle.graal.python.builtins.objects.cpyobject.PythonNativeObject;
6969
import com.oracle.graal.python.builtins.objects.cpyobject.PythonObjectNativeWrapper;
70+
import com.oracle.graal.python.builtins.objects.cpyobject.UnicodeObjectNodes.UnicodeAsWideCharNode;
7071
import com.oracle.graal.python.builtins.objects.exception.PBaseException;
7172
import com.oracle.graal.python.builtins.objects.floats.PFloat;
7273
import com.oracle.graal.python.builtins.objects.function.Arity;
@@ -1035,9 +1036,8 @@ Object doUnicode(TruffleObject o, String errors, int byteorder, Object errorMark
10351036

10361037
@Builtin(name = "PyTruffle_Unicode_AsWideChar", fixedNumOfArguments = 4)
10371038
@GenerateNodeFactory
1038-
public abstract static class PyTruffle_Unicode_AsWideChar extends NativeUnicodeBuiltin {
1039-
1040-
public abstract Object execute(Object obj, long elementSize, long elements, Object errorMarker);
1039+
abstract static class PyTruffle_Unicode_AsWideChar extends NativeUnicodeBuiltin {
1040+
@Child private UnicodeAsWideCharNode asWideCharNode;
10411041

10421042
@Specialization
10431043
Object doUnicode(PString s, long elementSize, long elements, Object errorMarker) {
@@ -1048,25 +1048,14 @@ Object doUnicode(PString s, long elementSize, long elements, Object errorMarker)
10481048
@TruffleBoundary
10491049
Object doUnicode(String s, long elementSize, long elements, Object errorMarker) {
10501050
try {
1051-
// use native byte order
1052-
Charset utf32Charset = getUTF32Charset(0);
1051+
if (asWideCharNode == null) {
1052+
CompilerDirectives.transferToInterpreterAndInvalidate();
1053+
asWideCharNode = insert(UnicodeAsWideCharNode.create());
1054+
}
10531055

1054-
// elementSize == 2: Store String in 'wchar_t' of size == 2, i.e., use UCS2. This is
1055-
// achieved by decoding to UTF32 (which is basically UCS4) and ignoring the two
1056-
// MSBs.
1057-
if (elementSize == 2L) {
1058-
ByteBuffer bytes = ByteBuffer.wrap(s.getBytes(utf32Charset));
1059-
// FIXME unsafe narrowing
1060-
ByteBuffer buf = ByteBuffer.allocate(Math.min(bytes.remaining() / 2, (int) (elements * elementSize)));
1061-
while (bytes.remaining() >= 4) {
1062-
buf.putChar((char) (bytes.getInt() & 0x0000FFFF));
1063-
}
1064-
buf.flip();
1065-
byte[] barr = new byte[buf.remaining()];
1066-
buf.get(barr);
1067-
return factory().createBytes(barr);
1068-
} else if (elementSize == 4L) {
1069-
return factory().createBytes(s.getBytes(utf32Charset));
1056+
PBytes wchars = asWideCharNode.execute(s, elementSize, elements);
1057+
if (wchars != null) {
1058+
return wchars;
10701059
} else {
10711060
return raiseNative(errorMarker, PythonErrorType.ValueError, "unsupported wchar size; was: %d", elementSize);
10721061
}
@@ -1075,10 +1064,6 @@ Object doUnicode(String s, long elementSize, long elements, Object errorMarker)
10751064
return raiseNative(errorMarker, PythonErrorType.LookupError, e.getMessage());
10761065
}
10771066
}
1078-
1079-
public static PyTruffle_Unicode_AsWideChar create() {
1080-
return TruffleCextBuiltinsFactory.PyTruffle_Unicode_AsWideCharFactory.create(null);
1081-
}
10821067
}
10831068

10841069
@Builtin(name = "PyTruffle_Bytes_AsString", fixedNumOfArguments = 2)

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cpyobject/PythonObjectNativeWrapperMR.java

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -40,14 +40,14 @@
4040

4141
import java.util.Arrays;
4242

43-
import com.oracle.graal.python.builtins.modules.TruffleCextBuiltins.PyTruffle_Unicode_AsWideChar;
4443
import com.oracle.graal.python.builtins.modules.TruffleCextBuiltins.ToSulongNode;
4544
import com.oracle.graal.python.builtins.objects.PNone;
4645
import com.oracle.graal.python.builtins.objects.PythonAbstractObject;
4746
import com.oracle.graal.python.builtins.objects.bytes.PBytes;
4847
import com.oracle.graal.python.builtins.objects.cpyobject.PythonObjectNativeWrapperMRFactory.ReadNativeMemberNodeGen;
4948
import com.oracle.graal.python.builtins.objects.cpyobject.PythonObjectNativeWrapperMRFactory.ToPyObjectNodeGen;
5049
import com.oracle.graal.python.builtins.objects.cpyobject.PythonObjectNativeWrapperMRFactory.WriteNativeMemberNodeGen;
50+
import com.oracle.graal.python.builtins.objects.cpyobject.UnicodeObjectNodes.UnicodeAsWideCharNode;
5151
import com.oracle.graal.python.builtins.objects.dict.PDict;
5252
import com.oracle.graal.python.builtins.objects.object.PythonObject;
5353
import com.oracle.graal.python.builtins.objects.str.PString;
@@ -192,20 +192,16 @@ Object doObItem(PSequence object, @SuppressWarnings("unused") String key) {
192192

193193
@Specialization(guards = "eq(UNICODE_WSTR, key)")
194194
Object doWstr(String object, @SuppressWarnings("unused") String key,
195-
@Cached("create()") PyTruffle_Unicode_AsWideChar asWideCharNode) {
196-
return new PySequenceArrayWrapper(asWideCharNode.execute(object, sizeofWchar(), object.length(), null));
195+
@Cached("create()") UnicodeAsWideCharNode asWideCharNode) {
196+
return new PySequenceArrayWrapper(asWideCharNode.execute(object, sizeofWchar(), object.length()));
197197
}
198198

199199
@Specialization(guards = "eq(UNICODE_WSTR_LENGTH, key)")
200200
long doWstrLength(String object, @SuppressWarnings("unused") String key,
201-
@Cached("create()") PyTruffle_Unicode_AsWideChar asWideCharNode) {
202-
// TODO refactor 'PyTruffle_Unicode_AsWideChar'
201+
@Cached("create()") UnicodeAsWideCharNode asWideCharNode) {
203202
long sizeofWchar = sizeofWchar();
204-
Object result = asWideCharNode.execute(object, sizeofWchar, object.length(), null);
205-
if (result instanceof PBytes) {
206-
return ((PBytes) result).len() / sizeofWchar;
207-
}
208-
return -1;
203+
PBytes result = asWideCharNode.execute(object, sizeofWchar, object.length());
204+
return result.len() / sizeofWchar;
209205
}
210206

211207
@Specialization(guards = "eq(UNICODE_STATE, key)")
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
package com.oracle.graal.python.builtins.objects.cpyobject;
2+
3+
import java.nio.ByteBuffer;
4+
import java.nio.charset.Charset;
5+
6+
import com.oracle.graal.python.builtins.objects.bytes.PBytes;
7+
import com.oracle.graal.python.builtins.objects.cpyobject.UnicodeObjectNodesFactory.UnicodeAsWideCharNodeGen;
8+
import com.oracle.graal.python.builtins.objects.str.PString;
9+
import com.oracle.graal.python.nodes.PBaseNode;
10+
import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary;
11+
import com.oracle.truffle.api.dsl.Specialization;
12+
13+
public abstract class UnicodeObjectNodes {
14+
15+
abstract static class UnicodeBaseNode extends PBaseNode {
16+
private static final int NATIVE_ORDER = 0;
17+
private static Charset UTF32;
18+
private static Charset UTF32LE;
19+
private static Charset UTF32BE;
20+
21+
protected static Charset getUTF32Charset(int byteorder) {
22+
String utf32Name = getUTF32Name(byteorder);
23+
if (byteorder == UnicodeBaseNode.NATIVE_ORDER) {
24+
if (UTF32 == null) {
25+
UTF32 = Charset.forName(utf32Name);
26+
}
27+
return UTF32;
28+
} else if (byteorder < UnicodeBaseNode.NATIVE_ORDER) {
29+
if (UTF32LE == null) {
30+
UTF32LE = Charset.forName(utf32Name);
31+
}
32+
return UTF32LE;
33+
}
34+
if (UTF32BE == null) {
35+
UTF32BE = Charset.forName(utf32Name);
36+
}
37+
return UTF32BE;
38+
}
39+
40+
protected static String getUTF32Name(int byteorder) {
41+
String csName;
42+
if (byteorder == 0) {
43+
csName = "UTF-32";
44+
} else if (byteorder < 0) {
45+
csName = "UTF-32LE";
46+
} else {
47+
csName = "UTF-32BE";
48+
}
49+
return csName;
50+
}
51+
}
52+
53+
public abstract static class UnicodeAsWideCharNode extends UnicodeBaseNode {
54+
55+
public abstract PBytes execute(Object obj, long elementSize, long elements);
56+
57+
@Specialization
58+
PBytes doUnicode(PString s, long elementSize, long elements) {
59+
return doUnicode(s.getValue(), elementSize, elements);
60+
}
61+
62+
@Specialization
63+
@TruffleBoundary
64+
PBytes doUnicode(String s, long elementSize, long elements) {
65+
// use native byte order
66+
Charset utf32Charset = getUTF32Charset(0);
67+
68+
// elementSize == 2: Store String in 'wchar_t' of size == 2, i.e., use UCS2. This is
69+
// achieved by decoding to UTF32 (which is basically UCS4) and ignoring the two
70+
// MSBs.
71+
if (elementSize == 2L) {
72+
ByteBuffer bytes = ByteBuffer.wrap(s.getBytes(utf32Charset));
73+
// FIXME unsafe narrowing
74+
ByteBuffer buf = ByteBuffer.allocate(Math.min(bytes.remaining() / 2, (int) (elements * elementSize)));
75+
while (bytes.remaining() >= 4) {
76+
buf.putChar((char) (bytes.getInt() & 0x0000FFFF));
77+
}
78+
buf.flip();
79+
byte[] barr = new byte[buf.remaining()];
80+
buf.get(barr);
81+
return factory().createBytes(barr);
82+
} else if (elementSize == 4L) {
83+
return factory().createBytes(s.getBytes(utf32Charset));
84+
} else {
85+
throw new RuntimeException("unsupported wchar size; was: " + elementSize);
86+
}
87+
}
88+
89+
public static UnicodeAsWideCharNode create() {
90+
return UnicodeAsWideCharNodeGen.create();
91+
}
92+
}
93+
94+
}

0 commit comments

Comments
 (0)