Skip to content

Commit 557bf31

Browse files
committed
Handle unpaired surrogates in unicode-escape codec
1 parent 33c7217 commit 557bf31

File tree

2 files changed

+8
-9
lines changed

2 files changed

+8
-9
lines changed

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/charset/PythonRawUnicodeEscapeCharsetEncoder.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,13 +40,14 @@
4040
*/
4141
package com.oracle.graal.python.charset;
4242

43-
import com.oracle.graal.python.util.PythonUtils;
4443
import java.nio.ByteBuffer;
4544
import java.nio.CharBuffer;
4645
import java.nio.charset.Charset;
4746
import java.nio.charset.CharsetEncoder;
4847
import java.nio.charset.CoderResult;
4948

49+
import com.oracle.graal.python.util.PythonUtils;
50+
5051
public class PythonRawUnicodeEscapeCharsetEncoder extends CharsetEncoder {
5152
protected PythonRawUnicodeEscapeCharsetEncoder(Charset cs) {
5253
super(cs, 2, 10, new byte[]{(byte) '?'});
@@ -73,10 +74,9 @@ protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target) {
7374
if (Character.isLowSurrogate(low)) {
7475
codePoint = Character.toCodePoint(ch, low);
7576
} else {
76-
// Unpaired surrogate, this shouldn't happen in any sanely constructed Java
77-
// String
78-
source.position(source.position() - 2);
79-
return CoderResult.malformedForLength(2);
77+
// Unpaired surrogate - emit the high surrogate as is and process the low in the
78+
// next iteration
79+
source.position(source.position() - 1);
8080
}
8181
}
8282
if (codePoint <= 0xFF) {

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/charset/PythonUnicodeEscapeCharsetEncoder.java

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -75,10 +75,9 @@ protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target) {
7575
if (Character.isLowSurrogate(low)) {
7676
codePoint = Character.toCodePoint(ch, low);
7777
} else {
78-
// Unpaired surrogate, this shouldn't happen in any sanely constructed Java
79-
// String
80-
source.position(source.position() - 2);
81-
return CoderResult.malformedForLength(2);
78+
// Unpaired surrogate - emit the high surrogate as is and process the low in the
79+
// next iteration
80+
source.position(source.position() - 1);
8281
}
8382
}
8483
int len = BytesUtils.unicodeEscape(codePoint, 0, tmpBuf);

0 commit comments

Comments
 (0)