Skip to content

Commit 1e17ff7

Browse files
committed
Implement 'codecs.raw_unicode_escape_decode'.
1 parent ced2182 commit 1e17ff7

File tree

2 files changed

+81
-29
lines changed

2 files changed

+81
-29
lines changed

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/CodecsModuleBuiltins.java

Lines changed: 80 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -352,15 +352,18 @@ private PTuple encodeString(String self, String errors) {
352352
int n = encoded.remaining();
353353
ByteBuffer buf = ByteBuffer.allocate(n);
354354
assert n % Integer.BYTES == 0;
355+
355356
while (encoded.hasRemaining()) {
356-
byte[] b = new byte[4];
357-
encoded.get(b);
358-
359-
boolean write = false;
360-
for (int i = 0; i < b.length; i++) {
361-
if (write || b[i] != 0) {
362-
buf.put(b[i]);
363-
write = true;
357+
int int1 = encoded.getInt();
358+
if (int1 > 31 && int1 <= 126) {
359+
buf.put((byte) int1);
360+
} else {
361+
buf.put((byte) '\\');
362+
buf.put((byte) 'u');
363+
String hexString = Integer.toHexString(int1);
364+
for (int i = 0; i < hexString.length(); i++) {
365+
assert hexString.charAt(i) < 128;
366+
buf.put((byte) hexString.charAt(i));
364367
}
365368
}
366369
}
@@ -379,7 +382,7 @@ private PTuple encodeString(String self, String errors) {
379382
// _codecs.decode(obj, encoding='utf-8', errors='strict')
380383
@Builtin(name = "__truffle_decode", fixedNumOfPositionalArgs = 1, keywordArguments = {"encoding", "errors"})
381384
@GenerateNodeFactory
382-
abstract static class CodecsDecodeNode extends PythonBuiltinNode {
385+
abstract static class CodecsDecodeNode extends EncodeBaseNode {
383386
@Child private SequenceStorageNodes.ToByteArrayNode toByteArrayNode;
384387

385388
@Specialization
@@ -430,25 +433,7 @@ private ByteBuffer getBytesBuffer(PIBytesLike bytesLike) {
430433

431434
@TruffleBoundary
432435
String decodeBytes(ByteBuffer bytes, String encoding, String errors) {
433-
CodingErrorAction errorAction;
434-
switch (errors) {
435-
// TODO: see [GR-10256] to implement the correct handling mechanics
436-
case "ignore":
437-
case "surrogatepass":
438-
errorAction = CodingErrorAction.IGNORE;
439-
break;
440-
case "replace":
441-
case "surrogateescape":
442-
case "namereplace":
443-
case "backslashreplace":
444-
case "xmlcharrefreplace":
445-
errorAction = CodingErrorAction.REPLACE;
446-
break;
447-
default:
448-
errorAction = CodingErrorAction.REPORT;
449-
break;
450-
}
451-
436+
CodingErrorAction errorAction = convertCodingErrorAction(errors);
452437
try {
453438
Charset charset = getCharset(encoding);
454439
CharBuffer decoded = charset.newDecoder().onMalformedInput(errorAction).onUnmappableCharacter(errorAction).decode(bytes);
@@ -461,6 +446,73 @@ String decodeBytes(ByteBuffer bytes, String encoding, String errors) {
461446
}
462447
}
463448

449+
@Builtin(name = "__truffle_raw_decode", fixedNumOfPositionalArgs = 1, keywordArguments = {"errors"})
450+
@GenerateNodeFactory
451+
abstract static class RawDecodeNode extends EncodeBaseNode {
452+
@Child private SequenceStorageNodes.ToByteArrayNode toByteArrayNode;
453+
454+
@Specialization
455+
Object decode(PIBytesLike bytes, @SuppressWarnings("unused") PNone errors) {
456+
String string = decodeBytes(getBytesBuffer(bytes), "strict");
457+
return factory().createTuple(new Object[]{string, string.length()});
458+
}
459+
460+
@Specialization(guards = {"isString(errors)"})
461+
Object decode(PIBytesLike bytes, Object errors,
462+
@Cached("createClassProfile()") ValueProfile errorsTypeProfile) {
463+
Object profiledErrors = errorsTypeProfile.profile(errors);
464+
String string = decodeBytes(getBytesBuffer(bytes), profiledErrors.toString());
465+
return factory().createTuple(new Object[]{string, string.length()});
466+
}
467+
468+
// @Fallback
469+
// Object decode(Object bytes, @SuppressWarnings("unused") Object encoding,
470+
// @SuppressWarnings("unused") Object errors) {
471+
// throw raise(TypeError, "a bytes-like object is required, not '%p'", bytes);
472+
// }
473+
474+
private ByteBuffer getBytesBuffer(PIBytesLike bytesLike) {
475+
if (toByteArrayNode == null) {
476+
CompilerDirectives.transferToInterpreterAndInvalidate();
477+
toByteArrayNode = insert(SequenceStorageNodes.ToByteArrayNode.create(false));
478+
}
479+
byte[] barr = toByteArrayNode.execute(bytesLike.getSequenceStorage());
480+
return ByteBuffer.wrap(barr, 0, barr.length);
481+
}
482+
483+
@TruffleBoundary
484+
String decodeBytes(ByteBuffer bytes, String errors) {
485+
CodingErrorAction errorAction = convertCodingErrorAction(errors);
486+
try {
487+
ByteBuffer buf = ByteBuffer.allocate(bytes.remaining() * Integer.BYTES);
488+
while (bytes.hasRemaining()) {
489+
int val;
490+
byte b = bytes.get();
491+
if (b == (byte) '\\') {
492+
byte b1 = bytes.get();
493+
if (b1 == (byte) 'u') {
494+
// read 2 bytes as integer
495+
val = bytes.getShort();
496+
} else if (b1 == (byte) 'U') {
497+
val = bytes.getInt();
498+
} else {
499+
throw new CharacterCodingException();
500+
}
501+
} else {
502+
val = b;
503+
}
504+
buf.putInt(val);
505+
}
506+
buf.flip();
507+
Charset charset = getCharset("utf-32");
508+
CharBuffer decoded = charset.newDecoder().onMalformedInput(errorAction).onUnmappableCharacter(errorAction).decode(buf);
509+
return String.valueOf(decoded);
510+
} catch (CharacterCodingException e) {
511+
throw raise(UnicodeDecodeError, "%s", e.getMessage());
512+
}
513+
}
514+
}
515+
464516
// _codecs.lookup(name)
465517
@Builtin(name = "__truffle_lookup", fixedNumOfPositionalArgs = 1)
466518
@GenerateNodeFactory

graalpython/lib-graalpython/_codecs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@ def raw_unicode_escape_encode(string, errors=None):
272272

273273
@__builtin__
274274
def raw_unicode_escape_decode(string, errors=None):
275-
raise NotImplementedError("raw_unicode_escape_decode")
275+
return __truffle_raw_decode(string, errors)
276276

277277

278278
@__builtin__

0 commit comments

Comments
 (0)