@@ -144,6 +144,7 @@ static void handle(TruffleEncoder encoder, String errorAction, Object inputObjec
144
144
@ Cached ConditionProfile backslashreplaceProfile ,
145
145
@ Cached ConditionProfile surrogatepassProfile ,
146
146
@ Cached ConditionProfile surrogateescapeProfile ,
147
+ @ Cached ConditionProfile xmlcharrefreplaceProfile ,
147
148
@ Cached RaiseEncodingErrorNode raiseEncodingErrorNode ,
148
149
@ Cached PRaiseNode raiseNode ) {
149
150
boolean fixed ;
@@ -157,6 +158,8 @@ static void handle(TruffleEncoder encoder, String errorAction, Object inputObjec
157
158
fixed = surrogatepass (encoder );
158
159
} else if (surrogateescapeProfile .profile (SURROGATEESCAPE .equals (errorAction ))) {
159
160
fixed = surrogateescape (encoder );
161
+ } else if (xmlcharrefreplaceProfile .profile (XMLCHARREFREPLACE .equals (errorAction ))) {
162
+ fixed = xmlcharrefreplace (encoder );
160
163
} else {
161
164
throw raiseNode .raise (LookupError , ErrorMessages .UNKNOWN_ERROR_HANDLER , errorAction );
162
165
}
@@ -234,6 +237,72 @@ private static boolean surrogateescape(TruffleEncoder encoder) {
234
237
return true ;
235
238
}
236
239
240
+ @ TruffleBoundary
241
+ private static boolean xmlcharrefreplace (TruffleEncoder encoder ) {
242
+ String p = new String (encoder .getInputChars (encoder .getErrorLength ()));
243
+ int size = 0 ;
244
+ for (int i = 0 ; i < encoder .getErrorLength (); ++i ) {
245
+ // object is guaranteed to be "ready"
246
+ int ch = p .codePointAt (i );
247
+ if (ch < 10 ) {
248
+ size += 2 + 1 + 1 ;
249
+ } else if (ch < 100 ) {
250
+ size += 2 + 2 + 1 ;
251
+ } else if (ch < 1000 ) {
252
+ size += 2 + 3 + 1 ;
253
+ } else if (ch < 10000 ) {
254
+ size += 2 + 4 + 1 ;
255
+ } else if (ch < 100000 ) {
256
+ size += 2 + 5 + 1 ;
257
+ } else if (ch < 1000000 ) {
258
+ size += 2 + 6 + 1 ;
259
+ } else {
260
+ size += 2 + 7 + 1 ;
261
+ }
262
+ }
263
+
264
+ byte [] replacement = new byte [size ];
265
+ int consumed = 0 ;
266
+ // generate replacement
267
+ for (int i = 0 ; i < p .length (); ++i ) {
268
+ int digits ;
269
+ int base ;
270
+ int ch = p .codePointAt (i );
271
+ replacement [consumed ++] = '&' ;
272
+ replacement [consumed ++] = '#' ;
273
+ if (ch < 10 ) {
274
+ digits = 1 ;
275
+ base = 1 ;
276
+ } else if (ch < 100 ) {
277
+ digits = 2 ;
278
+ base = 10 ;
279
+ } else if (ch < 1000 ) {
280
+ digits = 3 ;
281
+ base = 100 ;
282
+ } else if (ch < 10000 ) {
283
+ digits = 4 ;
284
+ base = 1000 ;
285
+ } else if (ch < 100000 ) {
286
+ digits = 5 ;
287
+ base = 10000 ;
288
+ } else if (ch < 1000000 ) {
289
+ digits = 6 ;
290
+ base = 100000 ;
291
+ } else {
292
+ digits = 7 ;
293
+ base = 1000000 ;
294
+ }
295
+ while (digits -- > 0 ) {
296
+ replacement [consumed ++] = (byte ) ('0' + ch / base );
297
+ ch %= base ;
298
+ base /= 10 ;
299
+ }
300
+ replacement [consumed ++] = ';' ;
301
+ }
302
+ encoder .replace (encoder .getErrorLength (), replacement , 0 , consumed );
303
+ return true ;
304
+ }
305
+
237
306
public static HandleEncodingErrorNode create () {
238
307
return CodecsModuleBuiltinsFactory .HandleEncodingErrorNodeGen .create ();
239
308
}
@@ -369,13 +438,13 @@ protected static CodingErrorAction convertCodingErrorAction(String errors) {
369
438
break ;
370
439
case REPLACE :
371
440
case NAMEREPLACE :
372
- case XMLCHARREFREPLACE :
373
441
errorAction = CodingErrorAction .REPLACE ;
374
442
break ;
375
443
case STRICT :
376
444
case BACKSLASHREPLACE :
377
445
case SURROGATEPASS :
378
446
case SURROGATEESCAPE :
447
+ case XMLCHARREFREPLACE :
379
448
default :
380
449
// Everything else will be handled by our Handle nodes
381
450
errorAction = CodingErrorAction .REPORT ;
0 commit comments