From 7f1f197b286a6e1b2a87dcd1cb0322069e1e07c9 Mon Sep 17 00:00:00 2001 From: Evgeny Vashkevich Date: Wed, 16 Apr 2025 17:18:34 +0200 Subject: [PATCH] [mypyc] Add primitive for bytes decode() method --- mypyc/lib-rt/CPy.h | 1 + mypyc/lib-rt/bytes_ops.c | 13 +++++++++++++ mypyc/primitives/bytes_ops.py | 8 ++++++++ mypyc/test-data/irbuild-bytes.test | 12 ++++++++++++ mypyc/test-data/run-bytes.test | 26 ++++++++++++++++++++++++++ 5 files changed, 60 insertions(+) diff --git a/mypyc/lib-rt/CPy.h b/mypyc/lib-rt/CPy.h index 1f0cf4dd63d6..6e0938c651dd 100644 --- a/mypyc/lib-rt/CPy.h +++ b/mypyc/lib-rt/CPy.h @@ -764,6 +764,7 @@ CPyTagged CPyBytes_GetItem(PyObject *o, CPyTagged index); PyObject *CPyBytes_Concat(PyObject *a, PyObject *b); PyObject *CPyBytes_Join(PyObject *sep, PyObject *iter); CPyTagged CPyBytes_Ord(PyObject *obj); +PyObject *CPy_DecodeUtf8(PyObject *bytes_obj, const char *errors); int CPyBytes_Compare(PyObject *left, PyObject *right); diff --git a/mypyc/lib-rt/bytes_ops.c b/mypyc/lib-rt/bytes_ops.c index 6ff34b021a9a..ff4a3cc7886b 100644 --- a/mypyc/lib-rt/bytes_ops.c +++ b/mypyc/lib-rt/bytes_ops.c @@ -162,3 +162,16 @@ CPyTagged CPyBytes_Ord(PyObject *obj) { PyErr_SetString(PyExc_TypeError, "ord() expects a character"); return CPY_INT_TAG; } + + +PyObject *CPy_DecodeUtf8(PyObject *bytes_obj, const char *errors) { + if (!PyBytes_Check(bytes_obj)) { + PyErr_SetString(PyExc_TypeError, "expected bytes object"); + return NULL; + } + + char *data = PyBytes_AS_STRING(bytes_obj); + Py_ssize_t size = PyBytes_GET_SIZE(bytes_obj); + + return PyUnicode_DecodeUTF8(data, size, errors); +} diff --git a/mypyc/primitives/bytes_ops.py b/mypyc/primitives/bytes_ops.py index 1afd196cff84..10f952afd16e 100644 --- a/mypyc/primitives/bytes_ops.py +++ b/mypyc/primitives/bytes_ops.py @@ -107,3 +107,11 @@ c_function_name="CPyBytes_Ord", error_kind=ERR_MAGIC, ) + +method_op( + name="decode", + arg_types=[bytes_rprimitive, bytes_rprimitive], + return_type=str_rprimitive, + c_function_name="CPy_DecodeUtf8", + error_kind=ERR_MAGIC, +) diff --git a/mypyc/test-data/irbuild-bytes.test b/mypyc/test-data/irbuild-bytes.test index 476c5ac59f48..81da031bcaee 100644 --- a/mypyc/test-data/irbuild-bytes.test +++ b/mypyc/test-data/irbuild-bytes.test @@ -185,3 +185,15 @@ L0: r10 = CPyBytes_Build(2, var, r9) b4 = r10 return 1 + +[case testDecodeUtf8] +def f(b: bytes) -> str: + return b.decode("utf-8") +[out] +def f(b): + b :: bytes + r0, r1 :: str +L0: + r0 = 'utf-8' + r1 = CPy_Decode(b, r0, 0) + return r1 diff --git a/mypyc/test-data/run-bytes.test b/mypyc/test-data/run-bytes.test index fa63c46a6798..5442919bdb90 100644 --- a/mypyc/test-data/run-bytes.test +++ b/mypyc/test-data/run-bytes.test @@ -323,3 +323,29 @@ class A: def test_bytes_dunder() -> None: assert b'%b' % A() == b'aaa' assert b'%s' % A() == b'aaa' + +[case testDecodeUtf8] +from typing import Any +from testutil import assertRaises +from a import bytes_subclass + +def test_decode_utf8() -> None: + assert b'hello'.decode('utf-8') == 'hello' + assert b''.decode('utf-8') == '' + + x: bytes = bytearray(b'hello') + assert x.decode('utf-8') == 'hello' + assert type(x.decode('utf-8')) == str + + y: Any = bytes_subclass() + assert y.decode('utf-8') == 'spook' + + n: Any = 123 + with assertRaises(AttributeError): + n.decode('utf-8') + + +[file a.py] +class bytes_subclass(bytes): + def decode(self, encoding='utf-8'): + return 'spook'