Skip to content

Commit 1c3ac6e

Browse files
committed
Fix decoding non-bytes
1 parent 2fb24ed commit 1c3ac6e

File tree

2 files changed

+56
-62
lines changed

2 files changed

+56
-62
lines changed

graalpython/com.oracle.graal.python.test/src/tests/test_codecs.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -31,16 +31,17 @@ def test_import():
3131
def test_decode():
3232
import codecs
3333

34-
# TODO: this does not work yet due to the fact that we do not handle all strings literal types yet
35-
# assert codecs.decode(b'\xe4\xf6\xfc', 'latin-1') == '\xe4\xf6\xfc'
36-
# assert_raises(TypeError, codecs.decode)
34+
assert codecs.decode(b'\xe4\xf6\xfc', 'latin-1') == '\xe4\xf6\xfc'
35+
assert_raises(TypeError, codecs.decode)
3736
assert codecs.decode(b'abc') == 'abc'
38-
# assert_raises(UnicodeDecodeError, codecs.decode, b'\xff', 'ascii')
37+
assert_raises(UnicodeDecodeError, codecs.decode, b'\xff', 'ascii')
3938

4039
# test keywords
41-
# assert codecs.decode(obj=b'\xe4\xf6\xfc', encoding='latin-1') == '\xe4\xf6\xfc'
42-
# assert codecs.decode(b'[\xff]', 'ascii', errors='ignore') == '[]'
40+
assert codecs.decode(obj=b'\xe4\xf6\xfc', encoding='latin-1') == '\xe4\xf6\xfc'
41+
assert codecs.decode(b'[\xff]', 'ascii', errors='ignore') == '[]'
4342
assert codecs.decode(b'[]', 'ascii') == '[]'
43+
assert codecs.decode(memoryview(b'[]'), 'ascii') == '[]'
44+
assert_raises(TypeError, codecs.decode, 'asdf', 'ascii')
4445

4546
data0 = b'\xc5'
4647
data1 = b'\x91'
@@ -52,16 +53,15 @@ def test_decode():
5253
def test_encode():
5354
import codecs
5455

55-
# TODO: this does not work yet due to the fact that we do not handle all strings literal types yet
56-
# assert codecs.encode('\xe4\xf6\xfc', 'latin-1') == b'\xe4\xf6\xfc'
57-
# assert_raises(TypeError, codecs.encode)
56+
assert codecs.encode('\xe4\xf6\xfc', 'latin-1') == b'\xe4\xf6\xfc'
57+
assert_raises(TypeError, codecs.encode)
5858
assert_raises(LookupError, codecs.encode, "foo", "__spam__")
59-
# assert codecs.encode('abc') == b'abc'
60-
# assert_raises(UnicodeEncodeError, codecs.encode, '\xffff', 'ascii')
59+
assert codecs.encode('abc') == b'abc'
60+
assert_raises(UnicodeEncodeError, codecs.encode, '\xffff', 'ascii')
6161

6262
# test keywords
63-
# assert codecs.encode(obj='\xe4\xf6\xfc', encoding='latin-1') == b'\xe4\xf6\xfc'
64-
# assert codecs.encode('[\xff]', 'ascii', errors='ignore') == b'[]'
63+
assert codecs.encode(obj='\xe4\xf6\xfc', encoding='latin-1') == b'\xe4\xf6\xfc'
64+
assert codecs.encode('[\xff]', 'ascii', errors='ignore') == b'[]'
6565
assert codecs.encode('[]', 'ascii') == b'[]'
6666

6767

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/CodecsModuleBuiltins.java

Lines changed: 43 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
import static com.oracle.graal.python.builtins.PythonBuiltinClassType.NotImplementedError;
4444
import static com.oracle.graal.python.builtins.objects.bytes.BytesUtils.HEXDIGITS;
4545
import static com.oracle.graal.python.builtins.objects.bytes.BytesUtils.digitValue;
46+
import static com.oracle.graal.python.nodes.BuiltinNames.ENCODE;
4647
import static com.oracle.graal.python.nodes.BuiltinNames._CODECS;
4748
import static com.oracle.graal.python.nodes.ErrorMessages.ARG_MUST_BE_CALLABLE;
4849
import static com.oracle.graal.python.nodes.ErrorMessages.BYTESLIKE_OBJ_REQUIRED;
@@ -53,6 +54,7 @@
5354
import static com.oracle.graal.python.nodes.ErrorMessages.S_MUST_RETURN_TUPLE;
5455
import static com.oracle.graal.python.nodes.ErrorMessages.UNKNOWN_ENCODING;
5556
import static com.oracle.graal.python.nodes.ErrorMessages.UNKNOWN_ERROR_HANDLER;
57+
import static com.oracle.graal.python.nodes.SpecialMethodNames.DECODE;
5658
import static com.oracle.graal.python.runtime.exception.PythonErrorType.LookupError;
5759
import static com.oracle.graal.python.runtime.exception.PythonErrorType.MemoryError;
5860
import static com.oracle.graal.python.runtime.exception.PythonErrorType.TypeError;
@@ -76,6 +78,7 @@
7678
import com.oracle.graal.python.builtins.PythonBuiltins;
7779
import com.oracle.graal.python.builtins.objects.PNone;
7880
import com.oracle.graal.python.builtins.objects.buffer.PythonBufferAccessLibrary;
81+
import com.oracle.graal.python.builtins.objects.buffer.PythonBufferAcquireLibrary;
7982
import com.oracle.graal.python.builtins.objects.bytes.ByteArrayBuffer;
8083
import com.oracle.graal.python.builtins.objects.bytes.BytesUtils;
8184
import com.oracle.graal.python.builtins.objects.bytes.PBytes;
@@ -91,12 +94,11 @@
9194
import com.oracle.graal.python.lib.PyCallableCheckNode;
9295
import com.oracle.graal.python.lib.PyObjectSizeNode;
9396
import com.oracle.graal.python.lib.PyObjectTypeCheck;
94-
import static com.oracle.graal.python.nodes.BuiltinNames.ENCODE;
9597
import com.oracle.graal.python.nodes.ErrorMessages;
9698
import com.oracle.graal.python.nodes.PNodeWithContext;
9799
import com.oracle.graal.python.nodes.PNodeWithRaise;
100+
import com.oracle.graal.python.nodes.PNodeWithRaiseAndIndirectCall;
98101
import com.oracle.graal.python.nodes.PRaiseNode;
99-
import static com.oracle.graal.python.nodes.SpecialMethodNames.DECODE;
100102
import com.oracle.graal.python.nodes.call.CallNode;
101103
import com.oracle.graal.python.nodes.call.special.CallBinaryMethodNode;
102104
import com.oracle.graal.python.nodes.call.special.CallUnaryMethodNode;
@@ -108,7 +110,6 @@
108110
import com.oracle.graal.python.nodes.function.builtins.PythonQuaternaryClinicBuiltinNode;
109111
import com.oracle.graal.python.nodes.function.builtins.PythonTernaryBuiltinNode;
110112
import com.oracle.graal.python.nodes.function.builtins.PythonTernaryClinicBuiltinNode;
111-
import com.oracle.graal.python.nodes.function.builtins.PythonUnaryBuiltinNode;
112113
import com.oracle.graal.python.nodes.function.builtins.PythonUnaryClinicBuiltinNode;
113114
import com.oracle.graal.python.nodes.function.builtins.clinic.ArgumentClinicProvider;
114115
import com.oracle.graal.python.nodes.util.CastToJavaStringNode;
@@ -557,49 +558,50 @@ protected ArgumentClinicProvider getArgumentClinic() {
557558
}
558559

559560
@Specialization
560-
Object decode(PBytesLike input, Object encoding, Object errors, Object finalData,
561+
Object decode(VirtualFrame frame, Object input, String encoding, String errors, boolean finalData,
561562
@Cached InternalCodecsDecodeNode internalNode) {
562-
return internalNode.execute(input, encoding, errors, finalData);
563+
return internalNode.execute(frame, this, input, encoding, errors, finalData);
563564
}
564565
}
565566

566567
@GenerateUncached
567568
public abstract static class InternalCodecsDecodeNode extends PNodeWithContext {
568-
abstract Object execute(Object input, Object encoding, Object errors, Object finalData);
569+
abstract Object execute(Frame frame, PNodeWithRaiseAndIndirectCall node, Object input, String encoding, String errors, boolean finalData);
569570

570-
public final Object call(Object input, Object encoding, Object errors, Object finalData) {
571-
return execute(input, encoding, errors, finalData);
571+
public final Object call(VirtualFrame frame, PNodeWithRaiseAndIndirectCall node, Object input, String encoding, String errors, boolean finalData) {
572+
return execute(frame, node, input, encoding, errors, finalData);
572573
}
573574

574-
@Specialization
575-
Object decode(PBytesLike input, String encoding, String errors, boolean finalData,
576-
@Cached GetInternalByteArrayNode getBytes,
575+
@Specialization(limit = "3")
576+
Object decode(VirtualFrame frame, PNodeWithRaiseAndIndirectCall node, Object input, String encoding, String errors, boolean finalData,
577+
@CachedLibrary("input") PythonBufferAcquireLibrary acquireLib,
578+
@CachedLibrary(limit = "1") PythonBufferAccessLibrary bufferLib,
577579
@Cached HandleDecodingErrorNode errorHandler,
578580
@Cached PRaiseNode raiseNode,
579581
@Cached PythonObjectFactory factory) {
580-
byte[] bytes = getBytes.execute(input.getSequenceStorage());
581-
CodingErrorAction errorAction = convertCodingErrorAction(errors);
582-
Charset charset = CharsetMapping.getCharset(encoding);
583-
if (charset == null) {
584-
throw raiseNode.raise(LookupError, ErrorMessages.UNKNOWN_ENCODING, encoding);
585-
}
586-
TruffleDecoder decoder;
582+
Object buffer = acquireLib.acquireReadonly(input, frame, node);
587583
try {
588-
decoder = new TruffleDecoder(CharsetMapping.normalize(encoding), charset, bytes, bytes.length, errorAction);
589-
while (!decoder.decodingStep(finalData)) {
590-
errorHandler.execute(decoder, errors, input);
584+
int len = bufferLib.getBufferLength(buffer);
585+
byte[] bytes = bufferLib.getInternalOrCopiedByteArray(buffer);
586+
CodingErrorAction errorAction = convertCodingErrorAction(errors);
587+
Charset charset = CharsetMapping.getCharset(encoding);
588+
if (charset == null) {
589+
throw raiseNode.raise(LookupError, ErrorMessages.UNKNOWN_ENCODING, encoding);
591590
}
592-
} catch (OutOfMemoryError e) {
593-
CompilerDirectives.transferToInterpreterAndInvalidate();
594-
throw raiseNode.raise(MemoryError);
591+
TruffleDecoder decoder;
592+
try {
593+
decoder = new TruffleDecoder(CharsetMapping.normalize(encoding), charset, bytes, len, errorAction);
594+
while (!decoder.decodingStep(finalData)) {
595+
errorHandler.execute(decoder, errors, input);
596+
}
597+
} catch (OutOfMemoryError e) {
598+
CompilerDirectives.transferToInterpreterAndInvalidate();
599+
throw raiseNode.raise(MemoryError);
600+
}
601+
return factory.createTuple(new Object[]{decoder.getString(), decoder.getInputPosition()});
602+
} finally {
603+
bufferLib.release(buffer);
595604
}
596-
return factory.createTuple(new Object[]{decoder.getString(), decoder.getInputPosition()});
597-
}
598-
599-
@Fallback
600-
Object decode(Object bytes, @SuppressWarnings("unused") Object encoding, @SuppressWarnings("unused") Object errors, @SuppressWarnings("unused") Object finalData,
601-
@Cached PRaiseNode raiseNode) {
602-
throw raiseNode.raise(TypeError, BYTESLIKE_OBJ_REQUIRED, bytes);
603605
}
604606
}
605607

@@ -842,33 +844,25 @@ private static boolean hasTruffleEncoding(String encoding) {
842844
return CharsetMapping.getCharset(encoding) != null;
843845
}
844846

845-
@Builtin(name = "lookup", minNumOfPositionalArgs = 1)
847+
@Builtin(name = "lookup", minNumOfPositionalArgs = 1, parameterNames = {"encoding"})
848+
@ArgumentClinic(name = "encoding", conversion = ArgumentClinic.ClinicConversion.String)
846849
@GenerateNodeFactory
847-
abstract static class LookupNode extends PythonUnaryBuiltinNode {
850+
abstract static class LookupNode extends PythonUnaryClinicBuiltinNode {
848851
@Specialization
849-
PTuple lookup(VirtualFrame frame, Object encoding,
852+
PTuple lookup(VirtualFrame frame, String encoding,
850853
@Cached InternalLookupNode internalNode) {
851854
return internalNode.execute(frame, encoding);
852855
}
856+
857+
@Override
858+
protected ArgumentClinicProvider getArgumentClinic() {
859+
return CodecsModuleBuiltinsClinicProviders.LookupNodeClinicProviderGen.INSTANCE;
860+
}
853861
}
854862

855863
@GenerateUncached
856864
abstract static class InternalLookupNode extends PNodeWithContext {
857-
abstract PTuple execute(Frame frame, Object encoding);
858-
859-
@Specialization
860-
PTuple lookup(VirtualFrame frame, PBytesLike encoding,
861-
@Cached InternalCodecsDecodeNode decodeNode,
862-
@Cached PyObjectTypeCheck typeCheck,
863-
@Cached CallUnaryMethodNode callNode,
864-
@Cached PyObjectSizeNode sizeNode,
865-
@Cached ConditionProfile hasSearchPathProfile,
866-
@Cached ConditionProfile hasTruffleEncodingProfile,
867-
@Cached ConditionProfile isTupleProfile,
868-
@Cached PRaiseNode raiseNode) {
869-
String decoded = (String) ((PTuple) decodeNode.execute(encoding, "ascii", PNone.NO_VALUE, true)).getSequenceStorage().getInternalArray()[0];
870-
return lookup(frame, decoded, callNode, typeCheck, sizeNode, hasSearchPathProfile, hasTruffleEncodingProfile, isTupleProfile, raiseNode);
871-
}
865+
abstract PTuple execute(Frame frame, String encoding);
872866

873867
@Specialization
874868
PTuple lookup(VirtualFrame frame, String encoding,

0 commit comments

Comments
 (0)