59
59
import com .oracle .graal .python .builtins .CoreFunctions ;
60
60
import com .oracle .graal .python .builtins .PythonBuiltins ;
61
61
import com .oracle .graal .python .builtins .objects .PNone ;
62
+ import com .oracle .graal .python .builtins .objects .bytes .BytesUtils ;
62
63
import com .oracle .graal .python .builtins .objects .bytes .PBytes ;
63
64
import com .oracle .graal .python .builtins .objects .bytes .PBytesLike ;
64
65
import com .oracle .graal .python .builtins .objects .common .HashingStorage ;
95
96
96
97
@ CoreFunctions (defineModule = "_codecs" )
97
98
public class CodecsModuleBuiltins extends PythonBuiltins {
99
+
100
+ public static final String STRICT = "strict" ;
101
+ public static final String IGNORE = "ignore" ;
102
+ public static final String REPLACE = "replace" ;
103
+ public static final String BACKSLASHREPLACE = "backslashreplace" ;
104
+ public static final String NAMEREPLACE = "namereplace" ;
105
+ public static final String XMLCHARREFREPLACE = "xmlcharrefreplace" ;
106
+ public static final String SURROGATEESCAPE = "surrogateescape" ;
107
+ public static final String SURROGATEPASS = "surrogatepass" ;
108
+
98
109
@ Override
99
110
protected List <? extends NodeFactory <? extends PythonBuiltinBaseNode >> getNodeFactories () {
100
111
return CodecsModuleBuiltinsFactory .getFactories ();
@@ -134,9 +145,14 @@ void handle(TruffleEncoder encoder, String errorAction, Object inputObject,
134
145
@ Cached PRaiseNode raiseNode ) {
135
146
// Ignore and replace are handled by Java Charset
136
147
switch (actionProfile .profile (errorAction )) {
137
- case "strict" :
148
+ case STRICT :
149
+ break ;
150
+ case BACKSLASHREPLACE :
151
+ if (backslashreplace (encoder )) {
152
+ return ;
153
+ }
138
154
break ;
139
- case "surrogatepass" :
155
+ case SURROGATEPASS :
140
156
if (surrogatepass (encoder )) {
141
157
return ;
142
158
}
@@ -147,6 +163,30 @@ void handle(TruffleEncoder encoder, String errorAction, Object inputObject,
147
163
throw raiseEncodingErrorNode .execute (encoder , inputObject );
148
164
}
149
165
166
+ @ TruffleBoundary
167
+ private static boolean backslashreplace (TruffleEncoder encoder ) {
168
+ String p = new String (encoder .getInputChars (encoder .getErrorLenght ()));
169
+ StringBuilder sb = new StringBuilder ();
170
+ byte [] buf = new byte [10 ];
171
+ for (int i = 0 ; i < p .length ();) {
172
+ int ch = p .codePointAt (i );
173
+ int len ;
174
+ if (ch < 0x100 ) {
175
+ BytesUtils .byteEscape (ch , 0 , buf );
176
+ len = 4 ;
177
+ } else {
178
+ len = BytesUtils .unicodeNonAsciiEscape (ch , 0 , buf );
179
+ }
180
+ for (int j = 0 ; j < len ; j ++) {
181
+ sb .append ((char ) buf [j ]);
182
+ }
183
+ i += Character .charCount (ch );
184
+ }
185
+ encoder .replace (sb .toString (), p .length ());
186
+ return true ;
187
+ }
188
+
189
+ @ TruffleBoundary
150
190
private static boolean surrogatepass (TruffleEncoder encoder ) {
151
191
// UTF-8 only for now. The name should be normalized already
152
192
if (encoder .getEncodingName ().equals ("utf_8" )) {
@@ -209,9 +249,14 @@ void doStrict(TruffleDecoder decoder, String errorAction, Object inputObject,
209
249
@ Cached PRaiseNode raiseNode ) {
210
250
// Ignore and replace are handled by Java Charset
211
251
switch (actionProfile .profile (errorAction )) {
212
- case "strict" :
252
+ case STRICT :
213
253
break ;
214
- case "surrogatepass" :
254
+ case BACKSLASHREPLACE :
255
+ if (backslashreplace (decoder )) {
256
+ return ;
257
+ }
258
+ break ;
259
+ case SURROGATEPASS :
215
260
if (surrogatepass (decoder )) {
216
261
return ;
217
262
}
@@ -222,6 +267,24 @@ void doStrict(TruffleDecoder decoder, String errorAction, Object inputObject,
222
267
throw raiseDecodingErrorNode .execute (decoder , inputObject );
223
268
}
224
269
270
+ @ TruffleBoundary
271
+ private static boolean backslashreplace (TruffleDecoder decoder ) {
272
+ byte [] p = decoder .getInputBytes (decoder .getErrorLenght ());
273
+ char [] replacement = new char [p .length * 4 ];
274
+ int outp = 0 ;
275
+ byte [] buf = new byte [4 ];
276
+ for (int i = 0 ; i < p .length ; i ++) {
277
+ BytesUtils .byteEscape (p [i ], 0 , buf );
278
+ replacement [outp ++] = (char ) buf [0 ];
279
+ replacement [outp ++] = (char ) buf [1 ];
280
+ replacement [outp ++] = (char ) buf [2 ];
281
+ replacement [outp ++] = (char ) buf [3 ];
282
+ }
283
+ decoder .replace (replacement , p .length );
284
+ return true ;
285
+ }
286
+
287
+ @ TruffleBoundary
225
288
private static boolean surrogatepass (TruffleDecoder decoder ) {
226
289
// UTF-8 only for now. The name should be normalized already
227
290
if (decoder .getEncodingName ().equals ("utf_8" )) {
@@ -250,16 +313,18 @@ protected static CodingErrorAction convertCodingErrorAction(String errors) {
250
313
CodingErrorAction errorAction ;
251
314
switch (errors ) {
252
315
// TODO: see [GR-10256] to implement the correct handling mechanics
253
- case "ignore" :
316
+ case IGNORE :
254
317
errorAction = CodingErrorAction .IGNORE ;
255
318
break ;
256
- case "replace" :
257
- case "surrogateescape" :
258
- case "namereplace" :
259
- case "backslashreplace" :
260
- case "xmlcharrefreplace" :
319
+ case REPLACE :
320
+ case SURROGATEESCAPE :
321
+ case NAMEREPLACE :
322
+ case XMLCHARREFREPLACE :
261
323
errorAction = CodingErrorAction .REPLACE ;
262
324
break ;
325
+ case STRICT :
326
+ case BACKSLASHREPLACE :
327
+ case SURROGATEPASS :
263
328
default :
264
329
// Everything else will be handled by our Handle nodes
265
330
errorAction = CodingErrorAction .REPORT ;
@@ -280,7 +345,7 @@ public abstract static class CodecsEncodeNode extends EncodeBaseNode {
280
345
Object encode (Object str , @ SuppressWarnings ("unused" ) PNone encoding , @ SuppressWarnings ("unused" ) PNone errors ,
281
346
@ Shared ("castStr" ) @ Cached CastToJavaStringNode castStr ) {
282
347
String profiledStr = cast (castStr , str );
283
- PBytes bytes = encodeString (str , profiledStr , "utf-8" , "strict" );
348
+ PBytes bytes = encodeString (str , profiledStr , "utf-8" , STRICT );
284
349
return factory ().createTuple (new Object []{bytes , getLength (bytes )});
285
350
}
286
351
@@ -290,7 +355,7 @@ Object encode(Object str, Object encoding, @SuppressWarnings("unused") PNone err
290
355
@ Shared ("castEncoding" ) @ Cached CastToJavaStringNode castEncoding ) {
291
356
String profiledStr = cast (castStr , str );
292
357
String profiledEncoding = cast (castEncoding , encoding );
293
- PBytes bytes = encodeString (str , profiledStr , profiledEncoding , "strict" );
358
+ PBytes bytes = encodeString (str , profiledStr , profiledEncoding , STRICT );
294
359
return factory ().createTuple (new Object []{bytes , getLength (bytes )});
295
360
}
296
361
@@ -371,12 +436,12 @@ abstract static class CodecsDecodeNode extends EncodeBaseNode {
371
436
372
437
@ Specialization
373
438
Object decode (VirtualFrame frame , PBytesLike bytes , @ SuppressWarnings ("unused" ) PNone encoding , @ SuppressWarnings ("unused" ) PNone errors , Object finalData ) {
374
- return decodeBytes (bytes , "utf-8" , "strict" , castToBoolean (frame , finalData ));
439
+ return decodeBytes (bytes , "utf-8" , STRICT , castToBoolean (frame , finalData ));
375
440
}
376
441
377
442
@ Specialization (guards = {"isString(encoding)" })
378
443
Object decode (VirtualFrame frame , PBytesLike bytes , Object encoding , @ SuppressWarnings ("unused" ) PNone errors , Object finalData ) {
379
- return decodeBytes (bytes , castToString (encoding ), "strict" , castToBoolean (frame , finalData ));
444
+ return decodeBytes (bytes , castToString (encoding ), STRICT , castToBoolean (frame , finalData ));
380
445
}
381
446
382
447
@ Specialization (guards = {"isString(errors)" })
@@ -498,7 +563,7 @@ private static int codePointAt(String chars, int pos) {
498
563
static class TruffleEncoder {
499
564
private final String encodingName ;
500
565
private final CharsetEncoder encoder ;
501
- private final CharBuffer inputBuffer ;
566
+ private CharBuffer inputBuffer ;
502
567
private ByteBuffer outputBuffer ;
503
568
private CoderResult coderResult ;
504
569
@@ -589,6 +654,16 @@ public void replace(byte[] replacement, int skipInput) {
589
654
inputBuffer .position (inputBuffer .position () + skipInput );
590
655
}
591
656
657
+ @ TruffleBoundary
658
+ public void replace (String replacement , int skipInput ) {
659
+ inputBuffer .position (inputBuffer .position () + skipInput );
660
+ CharBuffer newBuffer = CharBuffer .allocate (inputBuffer .remaining () + replacement .length ());
661
+ newBuffer .put (replacement );
662
+ newBuffer .put (inputBuffer );
663
+ newBuffer .flip ();
664
+ inputBuffer = newBuffer ;
665
+ }
666
+
592
667
public String getEncodingName () {
593
668
return encodingName ;
594
669
}
0 commit comments