@@ -352,15 +352,18 @@ private PTuple encodeString(String self, String errors) {
352
352
int n = encoded .remaining ();
353
353
ByteBuffer buf = ByteBuffer .allocate (n );
354
354
assert n % Integer .BYTES == 0 ;
355
+
355
356
while (encoded .hasRemaining ()) {
356
- byte [] b = new byte [4 ];
357
- encoded .get (b );
358
-
359
- boolean write = false ;
360
- for (int i = 0 ; i < b .length ; i ++) {
361
- if (write || b [i ] != 0 ) {
362
- buf .put (b [i ]);
363
- write = true ;
357
+ int int1 = encoded .getInt ();
358
+ if (int1 > 31 && int1 <= 126 ) {
359
+ buf .put ((byte ) int1 );
360
+ } else {
361
+ buf .put ((byte ) '\\' );
362
+ buf .put ((byte ) 'u' );
363
+ String hexString = Integer .toHexString (int1 );
364
+ for (int i = 0 ; i < hexString .length (); i ++) {
365
+ assert hexString .charAt (i ) < 128 ;
366
+ buf .put ((byte ) hexString .charAt (i ));
364
367
}
365
368
}
366
369
}
@@ -379,7 +382,7 @@ private PTuple encodeString(String self, String errors) {
379
382
// _codecs.decode(obj, encoding='utf-8', errors='strict')
380
383
@ Builtin (name = "__truffle_decode" , fixedNumOfPositionalArgs = 1 , keywordArguments = {"encoding" , "errors" })
381
384
@ GenerateNodeFactory
382
- abstract static class CodecsDecodeNode extends PythonBuiltinNode {
385
+ abstract static class CodecsDecodeNode extends EncodeBaseNode {
383
386
@ Child private SequenceStorageNodes .ToByteArrayNode toByteArrayNode ;
384
387
385
388
@ Specialization
@@ -430,25 +433,7 @@ private ByteBuffer getBytesBuffer(PIBytesLike bytesLike) {
430
433
431
434
@ TruffleBoundary
432
435
String decodeBytes (ByteBuffer bytes , String encoding , String errors ) {
433
- CodingErrorAction errorAction ;
434
- switch (errors ) {
435
- // TODO: see [GR-10256] to implement the correct handling mechanics
436
- case "ignore" :
437
- case "surrogatepass" :
438
- errorAction = CodingErrorAction .IGNORE ;
439
- break ;
440
- case "replace" :
441
- case "surrogateescape" :
442
- case "namereplace" :
443
- case "backslashreplace" :
444
- case "xmlcharrefreplace" :
445
- errorAction = CodingErrorAction .REPLACE ;
446
- break ;
447
- default :
448
- errorAction = CodingErrorAction .REPORT ;
449
- break ;
450
- }
451
-
436
+ CodingErrorAction errorAction = convertCodingErrorAction (errors );
452
437
try {
453
438
Charset charset = getCharset (encoding );
454
439
CharBuffer decoded = charset .newDecoder ().onMalformedInput (errorAction ).onUnmappableCharacter (errorAction ).decode (bytes );
@@ -461,6 +446,73 @@ String decodeBytes(ByteBuffer bytes, String encoding, String errors) {
461
446
}
462
447
}
463
448
449
+ @ Builtin (name = "__truffle_raw_decode" , fixedNumOfPositionalArgs = 1 , keywordArguments = {"errors" })
450
+ @ GenerateNodeFactory
451
+ abstract static class RawDecodeNode extends EncodeBaseNode {
452
+ @ Child private SequenceStorageNodes .ToByteArrayNode toByteArrayNode ;
453
+
454
+ @ Specialization
455
+ Object decode (PIBytesLike bytes , @ SuppressWarnings ("unused" ) PNone errors ) {
456
+ String string = decodeBytes (getBytesBuffer (bytes ), "strict" );
457
+ return factory ().createTuple (new Object []{string , string .length ()});
458
+ }
459
+
460
+ @ Specialization (guards = {"isString(errors)" })
461
+ Object decode (PIBytesLike bytes , Object errors ,
462
+ @ Cached ("createClassProfile()" ) ValueProfile errorsTypeProfile ) {
463
+ Object profiledErrors = errorsTypeProfile .profile (errors );
464
+ String string = decodeBytes (getBytesBuffer (bytes ), profiledErrors .toString ());
465
+ return factory ().createTuple (new Object []{string , string .length ()});
466
+ }
467
+
468
+ // @Fallback
469
+ // Object decode(Object bytes, @SuppressWarnings("unused") Object encoding,
470
+ // @SuppressWarnings("unused") Object errors) {
471
+ // throw raise(TypeError, "a bytes-like object is required, not '%p'", bytes);
472
+ // }
473
+
474
+ private ByteBuffer getBytesBuffer (PIBytesLike bytesLike ) {
475
+ if (toByteArrayNode == null ) {
476
+ CompilerDirectives .transferToInterpreterAndInvalidate ();
477
+ toByteArrayNode = insert (SequenceStorageNodes .ToByteArrayNode .create (false ));
478
+ }
479
+ byte [] barr = toByteArrayNode .execute (bytesLike .getSequenceStorage ());
480
+ return ByteBuffer .wrap (barr , 0 , barr .length );
481
+ }
482
+
483
+ @ TruffleBoundary
484
+ String decodeBytes (ByteBuffer bytes , String errors ) {
485
+ CodingErrorAction errorAction = convertCodingErrorAction (errors );
486
+ try {
487
+ ByteBuffer buf = ByteBuffer .allocate (bytes .remaining () * Integer .BYTES );
488
+ while (bytes .hasRemaining ()) {
489
+ int val ;
490
+ byte b = bytes .get ();
491
+ if (b == (byte ) '\\' ) {
492
+ byte b1 = bytes .get ();
493
+ if (b1 == (byte ) 'u' ) {
494
+ // read 2 bytes as integer
495
+ val = bytes .getShort ();
496
+ } else if (b1 == (byte ) 'U' ) {
497
+ val = bytes .getInt ();
498
+ } else {
499
+ throw new CharacterCodingException ();
500
+ }
501
+ } else {
502
+ val = b ;
503
+ }
504
+ buf .putInt (val );
505
+ }
506
+ buf .flip ();
507
+ Charset charset = getCharset ("utf-32" );
508
+ CharBuffer decoded = charset .newDecoder ().onMalformedInput (errorAction ).onUnmappableCharacter (errorAction ).decode (buf );
509
+ return String .valueOf (decoded );
510
+ } catch (CharacterCodingException e ) {
511
+ throw raise (UnicodeDecodeError , "%s" , e .getMessage ());
512
+ }
513
+ }
514
+ }
515
+
464
516
// _codecs.lookup(name)
465
517
@ Builtin (name = "__truffle_lookup" , fixedNumOfPositionalArgs = 1 )
466
518
@ GenerateNodeFactory
0 commit comments