Skip to content

Commit f74a590

Browse files
committed
add support for codecs surrogateescape decode and encode error handling
1 parent 74c04f8 commit f74a590

File tree

1 file changed

+46
-1
lines changed

1 file changed

+46
-1
lines changed

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/CodecsModuleBuiltins.java

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,7 @@ static void handle(TruffleEncoder encoder, String errorAction, Object inputObjec
143143
@Cached ConditionProfile strictProfile,
144144
@Cached ConditionProfile backslashreplaceProfile,
145145
@Cached ConditionProfile surrogatepassProfile,
146+
@Cached ConditionProfile surrogateescapeProfile,
146147
@Cached RaiseEncodingErrorNode raiseEncodingErrorNode,
147148
@Cached PRaiseNode raiseNode) {
148149
boolean fixed;
@@ -154,6 +155,8 @@ static void handle(TruffleEncoder encoder, String errorAction, Object inputObjec
154155
fixed = backslashreplace(encoder);
155156
} else if (surrogatepassProfile.profile(SURROGATEPASS.equals(errorAction))) {
156157
fixed = surrogatepass(encoder);
158+
} else if (surrogateescapeProfile.profile(SURROGATEESCAPE.equals(errorAction))) {
159+
fixed = surrogateescape(encoder);
157160
} else {
158161
throw raiseNode.raise(LookupError, ErrorMessages.UNKNOWN_ERROR_HANDLER, errorAction);
159162
}
@@ -213,6 +216,24 @@ private static boolean surrogatepass(TruffleEncoder encoder) {
213216
return false;
214217
}
215218

219+
@TruffleBoundary
220+
private static boolean surrogateescape(TruffleEncoder encoder) {
221+
String p = new String(encoder.getInputChars(encoder.getErrorLength()));
222+
byte[] replacement = new byte[p.length()];
223+
int outp = 0;
224+
for (int i = 0; i < p.length();) {
225+
int ch = p.codePointAt(i);
226+
if (!(0xDC80 <= ch && ch <= 0xDCFF)) {
227+
// Not a surrogate
228+
return false;
229+
}
230+
replacement[outp++] = (byte) (ch - 0xdc00);
231+
i += Character.charCount(ch);
232+
}
233+
encoder.replace(encoder.getErrorLength(), replacement, 0, outp);
234+
return true;
235+
}
236+
216237
public static HandleEncodingErrorNode create() {
217238
return CodecsModuleBuiltinsFactory.HandleEncodingErrorNodeGen.create();
218239
}
@@ -250,6 +271,7 @@ static void doStrict(TruffleDecoder decoder, String errorAction, Object inputObj
250271
@Cached ConditionProfile strictProfile,
251272
@Cached ConditionProfile backslashreplaceProfile,
252273
@Cached ConditionProfile surrogatepassProfile,
274+
@Cached ConditionProfile surrogateescapeProfile,
253275
@Cached RaiseDecodingErrorNode raiseDecodingErrorNode,
254276
@Cached PRaiseNode raiseNode) {
255277
boolean fixed;
@@ -261,6 +283,8 @@ static void doStrict(TruffleDecoder decoder, String errorAction, Object inputObj
261283
fixed = backslashreplace(decoder);
262284
} else if (surrogatepassProfile.profile(SURROGATEPASS.equals(errorAction))) {
263285
fixed = surrogatepass(decoder);
286+
} else if (surrogateescapeProfile.profile(SURROGATEESCAPE.equals(errorAction))) {
287+
fixed = surrogateescape(decoder);
264288
} else {
265289
throw raiseNode.raise(LookupError, ErrorMessages.UNKNOWN_ERROR_HANDLER, errorAction);
266290
}
@@ -308,6 +332,27 @@ private static boolean surrogatepass(TruffleDecoder decoder) {
308332
return false;
309333
}
310334

335+
@TruffleBoundary
336+
private static boolean surrogateescape(TruffleDecoder decoder) {
337+
int errorLength = decoder.getErrorLength();
338+
// decode up to 4 bytes
339+
int consumed = 0;
340+
boolean replaced = false;
341+
byte[] inputBytes = decoder.getInputBytes(errorLength);
342+
while (consumed < 4 && consumed < errorLength) {
343+
int b = inputBytes[consumed] & 0xff;
344+
// Refuse to escape ASCII bytes.
345+
if (b < 128) {
346+
break;
347+
}
348+
int codePoint = 0xdc00 + b;
349+
decoder.replace(1, Character.toChars(codePoint));
350+
replaced = true;
351+
consumed += 1;
352+
}
353+
return replaced;
354+
}
355+
311356
public static HandleDecodingErrorNode create() {
312357
return CodecsModuleBuiltinsFactory.HandleDecodingErrorNodeGen.create();
313358
}
@@ -323,14 +368,14 @@ protected static CodingErrorAction convertCodingErrorAction(String errors) {
323368
errorAction = CodingErrorAction.IGNORE;
324369
break;
325370
case REPLACE:
326-
case SURROGATEESCAPE:
327371
case NAMEREPLACE:
328372
case XMLCHARREFREPLACE:
329373
errorAction = CodingErrorAction.REPLACE;
330374
break;
331375
case STRICT:
332376
case BACKSLASHREPLACE:
333377
case SURROGATEPASS:
378+
case SURROGATEESCAPE:
334379
default:
335380
// Everything else will be handled by our Handle nodes
336381
errorAction = CodingErrorAction.REPORT;

0 commit comments

Comments
 (0)