Skip to content

Commit 1240452

Browse files
committed
add support for codecs xmlcharrefreplace decode and encode error handling
1 parent f74a590 commit 1240452

File tree

1 file changed

+70
-1
lines changed

1 file changed

+70
-1
lines changed

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/CodecsModuleBuiltins.java

Lines changed: 70 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@ static void handle(TruffleEncoder encoder, String errorAction, Object inputObjec
144144
@Cached ConditionProfile backslashreplaceProfile,
145145
@Cached ConditionProfile surrogatepassProfile,
146146
@Cached ConditionProfile surrogateescapeProfile,
147+
@Cached ConditionProfile xmlcharrefreplaceProfile,
147148
@Cached RaiseEncodingErrorNode raiseEncodingErrorNode,
148149
@Cached PRaiseNode raiseNode) {
149150
boolean fixed;
@@ -157,6 +158,8 @@ static void handle(TruffleEncoder encoder, String errorAction, Object inputObjec
157158
fixed = surrogatepass(encoder);
158159
} else if (surrogateescapeProfile.profile(SURROGATEESCAPE.equals(errorAction))) {
159160
fixed = surrogateescape(encoder);
161+
} else if (xmlcharrefreplaceProfile.profile(XMLCHARREFREPLACE.equals(errorAction))) {
162+
fixed = xmlcharrefreplace(encoder);
160163
} else {
161164
throw raiseNode.raise(LookupError, ErrorMessages.UNKNOWN_ERROR_HANDLER, errorAction);
162165
}
@@ -234,6 +237,72 @@ private static boolean surrogateescape(TruffleEncoder encoder) {
234237
return true;
235238
}
236239

240+
@TruffleBoundary
241+
private static boolean xmlcharrefreplace(TruffleEncoder encoder) {
242+
String p = new String(encoder.getInputChars(encoder.getErrorLength()));
243+
int size = 0;
244+
for (int i = 0; i < encoder.getErrorLength(); ++i) {
245+
// object is guaranteed to be "ready"
246+
int ch = p.codePointAt(i);
247+
if (ch < 10) {
248+
size += 2 + 1 + 1;
249+
} else if (ch < 100) {
250+
size += 2 + 2 + 1;
251+
} else if (ch < 1000) {
252+
size += 2 + 3 + 1;
253+
} else if (ch < 10000) {
254+
size += 2 + 4 + 1;
255+
} else if (ch < 100000) {
256+
size += 2 + 5 + 1;
257+
} else if (ch < 1000000) {
258+
size += 2 + 6 + 1;
259+
} else {
260+
size += 2 + 7 + 1;
261+
}
262+
}
263+
264+
byte[] replacement = new byte[size];
265+
int consumed = 0;
266+
// generate replacement
267+
for (int i = 0; i < p.length(); ++i) {
268+
int digits;
269+
int base;
270+
int ch = p.codePointAt(i);
271+
replacement[consumed++] = '&';
272+
replacement[consumed++] = '#';
273+
if (ch < 10) {
274+
digits = 1;
275+
base = 1;
276+
} else if (ch < 100) {
277+
digits = 2;
278+
base = 10;
279+
} else if (ch < 1000) {
280+
digits = 3;
281+
base = 100;
282+
} else if (ch < 10000) {
283+
digits = 4;
284+
base = 1000;
285+
} else if (ch < 100000) {
286+
digits = 5;
287+
base = 10000;
288+
} else if (ch < 1000000) {
289+
digits = 6;
290+
base = 100000;
291+
} else {
292+
digits = 7;
293+
base = 1000000;
294+
}
295+
while (digits-- > 0) {
296+
replacement[consumed++] = (byte) ('0' + ch / base);
297+
ch %= base;
298+
base /= 10;
299+
}
300+
replacement[consumed++] = ';';
301+
}
302+
encoder.replace(encoder.getErrorLength(), replacement, 0, consumed);
303+
return true;
304+
}
305+
237306
public static HandleEncodingErrorNode create() {
238307
return CodecsModuleBuiltinsFactory.HandleEncodingErrorNodeGen.create();
239308
}
@@ -369,13 +438,13 @@ protected static CodingErrorAction convertCodingErrorAction(String errors) {
369438
break;
370439
case REPLACE:
371440
case NAMEREPLACE:
372-
case XMLCHARREFREPLACE:
373441
errorAction = CodingErrorAction.REPLACE;
374442
break;
375443
case STRICT:
376444
case BACKSLASHREPLACE:
377445
case SURROGATEPASS:
378446
case SURROGATEESCAPE:
447+
case XMLCHARREFREPLACE:
379448
default:
380449
// Everything else will be handled by our Handle nodes
381450
errorAction = CodingErrorAction.REPORT;

0 commit comments

Comments
 (0)