@@ -143,6 +143,8 @@ static void handle(TruffleEncoder encoder, String errorAction, Object inputObjec
143
143
@ Cached ConditionProfile strictProfile ,
144
144
@ Cached ConditionProfile backslashreplaceProfile ,
145
145
@ Cached ConditionProfile surrogatepassProfile ,
146
+ @ Cached ConditionProfile surrogateescapeProfile ,
147
+ @ Cached ConditionProfile xmlcharrefreplaceProfile ,
146
148
@ Cached RaiseEncodingErrorNode raiseEncodingErrorNode ,
147
149
@ Cached PRaiseNode raiseNode ) {
148
150
boolean fixed ;
@@ -154,6 +156,10 @@ static void handle(TruffleEncoder encoder, String errorAction, Object inputObjec
154
156
fixed = backslashreplace (encoder );
155
157
} else if (surrogatepassProfile .profile (SURROGATEPASS .equals (errorAction ))) {
156
158
fixed = surrogatepass (encoder );
159
+ } else if (surrogateescapeProfile .profile (SURROGATEESCAPE .equals (errorAction ))) {
160
+ fixed = surrogateescape (encoder );
161
+ } else if (xmlcharrefreplaceProfile .profile (XMLCHARREFREPLACE .equals (errorAction ))) {
162
+ fixed = xmlcharrefreplace (encoder );
157
163
} else {
158
164
throw raiseNode .raise (LookupError , ErrorMessages .UNKNOWN_ERROR_HANDLER , errorAction );
159
165
}
@@ -213,6 +219,90 @@ private static boolean surrogatepass(TruffleEncoder encoder) {
213
219
return false ;
214
220
}
215
221
222
+ @ TruffleBoundary
223
+ private static boolean surrogateescape (TruffleEncoder encoder ) {
224
+ String p = new String (encoder .getInputChars (encoder .getErrorLength ()));
225
+ byte [] replacement = new byte [p .length ()];
226
+ int outp = 0 ;
227
+ for (int i = 0 ; i < p .length ();) {
228
+ int ch = p .codePointAt (i );
229
+ if (!(0xDC80 <= ch && ch <= 0xDCFF )) {
230
+ // Not a surrogate
231
+ return false ;
232
+ }
233
+ replacement [outp ++] = (byte ) (ch - 0xdc00 );
234
+ i += Character .charCount (ch );
235
+ }
236
+ encoder .replace (encoder .getErrorLength (), replacement , 0 , outp );
237
+ return true ;
238
+ }
239
+
240
+ @ TruffleBoundary
241
+ private static boolean xmlcharrefreplace (TruffleEncoder encoder ) {
242
+ String p = new String (encoder .getInputChars (encoder .getErrorLength ()));
243
+ int size = 0 ;
244
+ for (int i = 0 ; i < encoder .getErrorLength (); ++i ) {
245
+ // object is guaranteed to be "ready"
246
+ int ch = p .codePointAt (i );
247
+ if (ch < 10 ) {
248
+ size += 2 + 1 + 1 ;
249
+ } else if (ch < 100 ) {
250
+ size += 2 + 2 + 1 ;
251
+ } else if (ch < 1000 ) {
252
+ size += 2 + 3 + 1 ;
253
+ } else if (ch < 10000 ) {
254
+ size += 2 + 4 + 1 ;
255
+ } else if (ch < 100000 ) {
256
+ size += 2 + 5 + 1 ;
257
+ } else if (ch < 1000000 ) {
258
+ size += 2 + 6 + 1 ;
259
+ } else {
260
+ size += 2 + 7 + 1 ;
261
+ }
262
+ }
263
+
264
+ byte [] replacement = new byte [size ];
265
+ int consumed = 0 ;
266
+ // generate replacement
267
+ for (int i = 0 ; i < p .length (); ++i ) {
268
+ int digits ;
269
+ int base ;
270
+ int ch = p .codePointAt (i );
271
+ replacement [consumed ++] = '&' ;
272
+ replacement [consumed ++] = '#' ;
273
+ if (ch < 10 ) {
274
+ digits = 1 ;
275
+ base = 1 ;
276
+ } else if (ch < 100 ) {
277
+ digits = 2 ;
278
+ base = 10 ;
279
+ } else if (ch < 1000 ) {
280
+ digits = 3 ;
281
+ base = 100 ;
282
+ } else if (ch < 10000 ) {
283
+ digits = 4 ;
284
+ base = 1000 ;
285
+ } else if (ch < 100000 ) {
286
+ digits = 5 ;
287
+ base = 10000 ;
288
+ } else if (ch < 1000000 ) {
289
+ digits = 6 ;
290
+ base = 100000 ;
291
+ } else {
292
+ digits = 7 ;
293
+ base = 1000000 ;
294
+ }
295
+ while (digits -- > 0 ) {
296
+ replacement [consumed ++] = (byte ) ('0' + ch / base );
297
+ ch %= base ;
298
+ base /= 10 ;
299
+ }
300
+ replacement [consumed ++] = ';' ;
301
+ }
302
+ encoder .replace (encoder .getErrorLength (), replacement , 0 , consumed );
303
+ return true ;
304
+ }
305
+
216
306
public static HandleEncodingErrorNode create () {
217
307
return CodecsModuleBuiltinsFactory .HandleEncodingErrorNodeGen .create ();
218
308
}
@@ -250,6 +340,7 @@ static void doStrict(TruffleDecoder decoder, String errorAction, Object inputObj
250
340
@ Cached ConditionProfile strictProfile ,
251
341
@ Cached ConditionProfile backslashreplaceProfile ,
252
342
@ Cached ConditionProfile surrogatepassProfile ,
343
+ @ Cached ConditionProfile surrogateescapeProfile ,
253
344
@ Cached RaiseDecodingErrorNode raiseDecodingErrorNode ,
254
345
@ Cached PRaiseNode raiseNode ) {
255
346
boolean fixed ;
@@ -261,6 +352,8 @@ static void doStrict(TruffleDecoder decoder, String errorAction, Object inputObj
261
352
fixed = backslashreplace (decoder );
262
353
} else if (surrogatepassProfile .profile (SURROGATEPASS .equals (errorAction ))) {
263
354
fixed = surrogatepass (decoder );
355
+ } else if (surrogateescapeProfile .profile (SURROGATEESCAPE .equals (errorAction ))) {
356
+ fixed = surrogateescape (decoder );
264
357
} else {
265
358
throw raiseNode .raise (LookupError , ErrorMessages .UNKNOWN_ERROR_HANDLER , errorAction );
266
359
}
@@ -308,6 +401,27 @@ private static boolean surrogatepass(TruffleDecoder decoder) {
308
401
return false ;
309
402
}
310
403
404
+ @ TruffleBoundary
405
+ private static boolean surrogateescape (TruffleDecoder decoder ) {
406
+ int errorLength = decoder .getErrorLength ();
407
+ // decode up to 4 bytes
408
+ int consumed = 0 ;
409
+ boolean replaced = false ;
410
+ byte [] inputBytes = decoder .getInputBytes (errorLength );
411
+ while (consumed < 4 && consumed < errorLength ) {
412
+ int b = inputBytes [consumed ] & 0xff ;
413
+ // Refuse to escape ASCII bytes.
414
+ if (b < 128 ) {
415
+ break ;
416
+ }
417
+ int codePoint = 0xdc00 + b ;
418
+ decoder .replace (1 , Character .toChars (codePoint ));
419
+ replaced = true ;
420
+ consumed += 1 ;
421
+ }
422
+ return replaced ;
423
+ }
424
+
311
425
public static HandleDecodingErrorNode create () {
312
426
return CodecsModuleBuiltinsFactory .HandleDecodingErrorNodeGen .create ();
313
427
}
@@ -323,14 +437,14 @@ protected static CodingErrorAction convertCodingErrorAction(String errors) {
323
437
errorAction = CodingErrorAction .IGNORE ;
324
438
break ;
325
439
case REPLACE :
326
- case SURROGATEESCAPE :
327
440
case NAMEREPLACE :
328
- case XMLCHARREFREPLACE :
329
441
errorAction = CodingErrorAction .REPLACE ;
330
442
break ;
331
443
case STRICT :
332
444
case BACKSLASHREPLACE :
333
445
case SURROGATEPASS :
446
+ case SURROGATEESCAPE :
447
+ case XMLCHARREFREPLACE :
334
448
default :
335
449
// Everything else will be handled by our Handle nodes
336
450
errorAction = CodingErrorAction .REPORT ;
0 commit comments