Skip to content

Commit 0f0e0ae

Browse files
committed
Fix encoding unicode characters in raw-unicode-escape
1 parent eb4bd36 commit 0f0e0ae

File tree

1 file changed

+3
-4
lines changed

1 file changed

+3
-4
lines changed

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/CodecsModuleBuiltins.java

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -423,7 +423,8 @@ private Object[] encodeString(String self, String errors) {
423423
try {
424424
ByteBuffer encoded = UTF32.newEncoder().onMalformedInput(errorAction).onUnmappableCharacter(errorAction).encode(CharBuffer.wrap(self));
425425
int n = encoded.remaining();
426-
ByteBuffer buf = ByteBuffer.allocate(n);
426+
// Worst case is 6 bytes ("\\uXXXX") for every java char
427+
ByteBuffer buf = ByteBuffer.allocate(self.length() * 6);
427428
assert n % Integer.BYTES == 0;
428429
int codePoints = n / Integer.BYTES;
429430

@@ -432,9 +433,7 @@ private Object[] encodeString(String self, String errors) {
432433
if (codePoint <= 0xFF) {
433434
buf.put((byte) codePoint);
434435
} else {
435-
buf.put((byte) '\\');
436-
buf.put((byte) 'u');
437-
String hexString = Integer.toHexString(codePoint);
436+
String hexString = String.format((codePoint <= 0xFFFF ? "\\u%04x" : "\\U%08x"), codePoint);
438437
for (int i = 0; i < hexString.length(); i++) {
439438
assert hexString.charAt(i) < 128;
440439
buf.put((byte) hexString.charAt(i));

0 commit comments

Comments
 (0)