Skip to content

Commit 2eb76ad

Browse files
committed
Fix decoding unicode characters in raw-unicode-escape
1 parent 0f0e0ae commit 2eb76ad

File tree

1 file changed

+7
-4
lines changed

1 file changed

+7
-4
lines changed

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/CodecsModuleBuiltins.java

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
import static com.oracle.graal.python.runtime.exception.PythonErrorType.UnicodeDecodeError;
4646
import static com.oracle.graal.python.runtime.exception.PythonErrorType.UnicodeEncodeError;
4747

48+
import java.nio.BufferUnderflowException;
4849
import java.nio.ByteBuffer;
4950
import java.nio.CharBuffer;
5051
import java.nio.charset.CharacterCodingException;
@@ -586,16 +587,18 @@ String decodeBytes(ByteBuffer bytes, String errors) {
586587
CodingErrorAction errorAction = convertCodingErrorAction(errors);
587588
try {
588589
ByteBuffer buf = ByteBuffer.allocate(bytes.remaining() * Integer.BYTES);
590+
byte[] hexString = new byte[8];
589591
while (bytes.hasRemaining()) {
590592
int val;
591593
byte b = bytes.get();
592594
if (b == (byte) '\\') {
593595
byte b1 = bytes.get();
594596
if (b1 == (byte) 'u') {
595-
// read 2 bytes as integer
596-
val = bytes.getShort();
597+
bytes.get(hexString, 0, 4);
598+
val = Integer.parseInt(new String(hexString, 0, 4), 16);
597599
} else if (b1 == (byte) 'U') {
598-
val = bytes.getInt();
600+
bytes.get(hexString, 0, 8);
601+
val = Integer.parseInt(new String(hexString, 0, 8), 16);
599602
} else {
600603
throw new CharacterCodingException();
601604
}
@@ -609,7 +612,7 @@ String decodeBytes(ByteBuffer bytes, String errors) {
609612
buf.flip();
610613
CharBuffer decoded = UTF32.newDecoder().onMalformedInput(errorAction).onUnmappableCharacter(errorAction).decode(buf);
611614
return String.valueOf(decoded);
612-
} catch (CharacterCodingException e) {
615+
} catch (CharacterCodingException | NumberFormatException | BufferUnderflowException e) {
613616
throw raise(UnicodeDecodeError, e);
614617
}
615618
}

0 commit comments

Comments
 (0)