Skip to content

Commit fbb5073

Browse files
committed
Redo the workaround for marshalling surrogates in strings
1 parent e0a7025 commit fbb5073

File tree

1 file changed

+10
-6
lines changed

1 file changed

+10
-6
lines changed

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/MarshalModuleBuiltins.java

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1045,24 +1045,28 @@ private Object readObject(int type, AddRefAndReturn addRef) throws NumberFormatE
10451045

10461046
private void writeString(TruffleString v) throws IOException {
10471047
/*
1048-
* UTF-8 doesn't support surrogates, we fall back on UTF-32 if the string contains them
1048+
* Ugly workaround for GR-39571 - TruffleString UTF-8 doesn't support surrogate
1049+
* passthrough. If the string contains surrogates, we mark it and emit it as UTF-32.
10491050
*/
10501051
Encoding encoding;
10511052
if (v.isCompatibleTo(Encoding.UTF_8)) {
10521053
encoding = Encoding.UTF_8;
1053-
writeInt(0);
10541054
} else {
1055-
encoding = Encoding.UTF_32;
1056-
writeInt(1);
1055+
encoding = Encoding.UTF_32LE;
1056+
writeInt(-1);
10571057
}
10581058
InternalByteArray ba = v.switchEncodingUncached(encoding).getInternalByteArrayUncached(encoding);
10591059
writeSize(ba.getLength());
10601060
out.write(ba.getArray(), ba.getOffset(), ba.getLength());
10611061
}
10621062

10631063
private TruffleString readString() {
1064-
Encoding encoding = readInt() == 0 ? Encoding.UTF_8 : Encoding.UTF_32;
1065-
int sz = readSize();
1064+
Encoding encoding = Encoding.UTF_8;
1065+
int sz = readInt();
1066+
if (sz < 0) {
1067+
encoding = Encoding.UTF_32LE;
1068+
sz = readSize();
1069+
}
10661070
return TruffleString.fromByteArrayUncached(readNBytes(sz), 0, sz, encoding, true).switchEncodingUncached(TS_ENCODING);
10671071
}
10681072

0 commit comments

Comments
 (0)