Skip to content

Commit 2abe17a

Browse files
committed
Unicode escape codecs are based on latin-1
1 parent df3979b commit 2abe17a

File tree

1 file changed

+4
-2
lines changed

1 file changed

+4
-2
lines changed

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/CodecsModuleBuiltins.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,7 @@ Object encode(VirtualFrame frame, Object bytes, @SuppressWarnings("unused") Stri
311311

312312
@TruffleBoundary
313313
private static String strFromBytes(byte[] execute) {
314-
return new String(execute);
314+
return new String(execute, StandardCharsets.ISO_8859_1);
315315
}
316316
}
317317

@@ -601,7 +601,9 @@ String decodeBytes(ByteBuffer bytes, String errors) {
601601
throw new CharacterCodingException();
602602
}
603603
} else {
604-
val = b;
604+
// Bytes that are not an escape sequence are latin-1, which maps to unicode
605+
// codepoints directly
606+
val = b & 0xFF;
605607
}
606608
buf.putInt(val);
607609
}

0 commit comments

Comments
 (0)