diff --git a/mypyc/irbuild/specialize.py b/mypyc/irbuild/specialize.py index 84807a7fdb53..8cba045d1315 100644 --- a/mypyc/irbuild/specialize.py +++ b/mypyc/irbuild/specialize.py @@ -70,6 +70,7 @@ is_int64_rprimitive, is_int_rprimitive, is_list_rprimitive, + is_str_rprimitive, is_uint8_rprimitive, list_rprimitive, object_rprimitive, @@ -100,7 +101,12 @@ ) from mypyc.primitives.float_ops import isinstance_float from mypyc.primitives.generic_ops import generic_setattr -from mypyc.primitives.int_ops import isinstance_int +from mypyc.primitives.int_ops import ( + int_to_big_endian_op, + int_to_bytes_op, + int_to_little_endian_op, + isinstance_int, +) from mypyc.primitives.list_ops import isinstance_list, new_list_set_item_op from mypyc.primitives.misc_ops import isinstance_bool from mypyc.primitives.set_ops import isinstance_frozenset, isinstance_set @@ -1136,3 +1142,40 @@ def translate_object_setattr(builder: IRBuilder, expr: CallExpr, callee: RefExpr name_reg = builder.accept(attr_name) return builder.call_c(generic_setattr, [self_reg, name_reg, value], expr.line) + + +@specialize_function("to_bytes", int_rprimitive) +def specialize_int_to_bytes(builder: IRBuilder, expr: CallExpr, callee: RefExpr) -> Value | None: + # int.to_bytes(length, byteorder, signed=False) + # args: [self, length, byteorder, (optional) signed] + if len(expr.args) == 2: + signed_arg = builder.false() + elif len(expr.args) == 3: + signed_arg = builder.accept(expr.args[2]) + else: + return None + if not isinstance(callee, MemberExpr): + return None + self_arg = builder.accept(callee.expr) + length_arg = builder.accept(expr.args[0]) + byteorder_expr = expr.args[1] + if not ( + is_int_rprimitive(builder.node_type(length_arg)) + and is_str_rprimitive(builder.node_type(byteorder_expr)) + and is_bool_rprimitive(builder.node_type(signed_arg)) + ): + return None + if isinstance(byteorder_expr, StrExpr): + if byteorder_expr.value == "little": + return builder.call_c( + int_to_little_endian_op, [self_arg, length_arg, signed_arg], expr.line + ) + elif byteorder_expr.value == "big": + return builder.call_c( + int_to_big_endian_op, [self_arg, length_arg, signed_arg], expr.line + ) + # Fallback to generic primitive op + byteorder_arg = builder.accept(byteorder_expr) + return builder.call_c( + int_to_bytes_op, [self_arg, length_arg, byteorder_arg, signed_arg], expr.line + ) diff --git a/mypyc/lib-rt/CPy.h b/mypyc/lib-rt/CPy.h index e9dfd8de3683..6f661fc4a87a 100644 --- a/mypyc/lib-rt/CPy.h +++ b/mypyc/lib-rt/CPy.h @@ -148,6 +148,9 @@ CPyTagged CPyTagged_Remainder_(CPyTagged left, CPyTagged right); CPyTagged CPyTagged_BitwiseLongOp_(CPyTagged a, CPyTagged b, char op); CPyTagged CPyTagged_Rshift_(CPyTagged left, CPyTagged right); CPyTagged CPyTagged_Lshift_(CPyTagged left, CPyTagged right); +PyObject *CPyTagged_ToBytes(CPyTagged self, Py_ssize_t length, PyObject *byteorder, int signed_flag); +PyObject *CPyTagged_ToBigEndianBytes(CPyTagged self, Py_ssize_t length, int signed_flag); +PyObject *CPyTagged_ToLittleEndianBytes(CPyTagged self, Py_ssize_t length, int signed_flag); PyObject *CPyTagged_Str(CPyTagged n); CPyTagged CPyTagged_FromFloat(double f); diff --git a/mypyc/lib-rt/int_ops.c b/mypyc/lib-rt/int_ops.c index e2c302eea576..5abebb9d1048 100644 --- a/mypyc/lib-rt/int_ops.c +++ b/mypyc/lib-rt/int_ops.c @@ -581,3 +581,65 @@ double CPyTagged_TrueDivide(CPyTagged x, CPyTagged y) { } return 1.0; } + +static PyObject *CPyLong_ToBytes(PyObject *v, Py_ssize_t length, int little_endian, int signed_flag) { + // This is a wrapper for PyLong_AsByteArray and PyBytes_FromStringAndSize + PyObject *result = PyBytes_FromStringAndSize(NULL, length); + if (!result) { + return NULL; + } + unsigned char *bytes = (unsigned char *)PyBytes_AS_STRING(result); +#if PY_VERSION_HEX >= 0x030D0000 // 3.13.0 + int res = _PyLong_AsByteArray((PyLongObject *)v, bytes, length, little_endian, signed_flag, 1); +#else + int res = _PyLong_AsByteArray((PyLongObject *)v, bytes, length, little_endian, signed_flag); +#endif + if (res < 0) { + Py_DECREF(result); + return NULL; + } + return result; +} + +// int.to_bytes(length, byteorder, signed=False) +PyObject *CPyTagged_ToBytes(CPyTagged self, Py_ssize_t length, PyObject *byteorder, int signed_flag) { + PyObject *pyint = CPyTagged_AsObject(self); + if (!PyUnicode_Check(byteorder)) { + Py_DECREF(pyint); + PyErr_SetString(PyExc_TypeError, "byteorder must be str"); + return NULL; + } + const char *order = PyUnicode_AsUTF8(byteorder); + if (!order) { + Py_DECREF(pyint); + return NULL; + } + int little_endian; + if (strcmp(order, "big") == 0) { + little_endian = 0; + } else if (strcmp(order, "little") == 0) { + little_endian = 1; + } else { + PyErr_SetString(PyExc_ValueError, "byteorder must be either 'little' or 'big'"); + return NULL; + } + PyObject *result = CPyLong_ToBytes(pyint, length, little_endian, signed_flag); + Py_DECREF(pyint); + return result; +} + +// int.to_bytes(length, byteorder="little", signed=False) +PyObject *CPyTagged_ToLittleEndianBytes(CPyTagged self, Py_ssize_t length, int signed_flag) { + PyObject *pyint = CPyTagged_AsObject(self); + PyObject *result = CPyLong_ToBytes(pyint, length, 1, signed_flag); + Py_DECREF(pyint); + return result; +} + +// int.to_bytes(length, "big", signed=False) +PyObject *CPyTagged_ToBigEndianBytes(CPyTagged self, Py_ssize_t length, int signed_flag) { + PyObject *pyint = CPyTagged_AsObject(self); + PyObject *result = CPyLong_ToBytes(pyint, length, 0, signed_flag); + Py_DECREF(pyint); + return result; +} diff --git a/mypyc/primitives/int_ops.py b/mypyc/primitives/int_ops.py index d723c9b63a86..2af53075d3a6 100644 --- a/mypyc/primitives/int_ops.py +++ b/mypyc/primitives/int_ops.py @@ -21,6 +21,7 @@ RType, bit_rprimitive, bool_rprimitive, + bytes_rprimitive, c_pyssize_t_rprimitive, float_rprimitive, int16_rprimitive, @@ -305,3 +306,31 @@ def int_unary_op(name: str, c_function_name: str) -> PrimitiveDescription: c_function_name="PyLong_Check", error_kind=ERR_NEVER, ) + +# specialized custom_op cases for int.to_bytes + +# int.to_bytes(length, "big") +# int.to_bytes(length, "big", signed=...) +int_to_big_endian_op = custom_op( + arg_types=[int_rprimitive, c_pyssize_t_rprimitive, bool_rprimitive], + return_type=bytes_rprimitive, + c_function_name="CPyTagged_ToBigEndianBytes", + error_kind=ERR_MAGIC, +) + +# int.to_bytes(length, "little") +# int.to_bytes(length, "little", signed=...) +int_to_little_endian_op = custom_op( + arg_types=[int_rprimitive, c_pyssize_t_rprimitive, bool_rprimitive], + return_type=bytes_rprimitive, + c_function_name="CPyTagged_ToLittleEndianBytes", + error_kind=ERR_MAGIC, +) + +# int.to_bytes(length, byteorder, signed=...) +int_to_bytes_op = custom_op( + arg_types=[int_rprimitive, c_pyssize_t_rprimitive, str_rprimitive, bool_rprimitive], + return_type=bytes_rprimitive, + c_function_name="CPyTagged_ToBytes", + error_kind=ERR_MAGIC, +) diff --git a/mypyc/test-data/fixtures/ir.py b/mypyc/test-data/fixtures/ir.py index 22a6a5986cbd..bc4738dc3a99 100644 --- a/mypyc/test-data/fixtures/ir.py +++ b/mypyc/test-data/fixtures/ir.py @@ -85,6 +85,7 @@ def __lt__(self, n: int) -> bool: pass def __gt__(self, n: int) -> bool: pass def __le__(self, n: int) -> bool: pass def __ge__(self, n: int) -> bool: pass + def to_bytes(self, length: int, order: str, *, signed: bool = False) -> bytes: pass class str: @overload diff --git a/mypyc/test-data/irbuild-int.test b/mypyc/test-data/irbuild-int.test index bdf9127b722a..82d6d85907c6 100644 --- a/mypyc/test-data/irbuild-int.test +++ b/mypyc/test-data/irbuild-int.test @@ -210,3 +210,32 @@ L0: r0 = CPyTagged_Invert(n) x = r0 return x + +[case testIntToBytes] +def f(x: int) -> bytes: + return x.to_bytes(2, "big") +def g(x: int) -> bytes: + return x.to_bytes(4, "little", signed=True) +def h(x: int, byteorder: str) -> bytes: + return x.to_bytes(8, byteorder) + +[out] +def f(x): + x :: int + r0 :: bytes +L0: + r0 = CPyTagged_ToBigEndianBytes(x, 2, 0) + return r0 +def g(x): + x :: int + r0 :: bytes +L0: + r0 = CPyTagged_ToLittleEndianBytes(x, 4, 1) + return r0 +def h(x, byteorder): + x :: int + byteorder :: str + r0 :: bytes +L0: + r0 = CPyTagged_ToBytes(x, 8, byteorder, 0) + return r0 diff --git a/mypyc/test-data/run-integers.test b/mypyc/test-data/run-integers.test index 1163c9d942f7..576ae38d54eb 100644 --- a/mypyc/test-data/run-integers.test +++ b/mypyc/test-data/run-integers.test @@ -572,3 +572,24 @@ class subc(int): [file userdefinedint.py] class int: pass + +[case testIntToBytes] +from testutil import assertRaises +def to_bytes(n: int, length: int, byteorder: str, signed: bool = False) -> bytes: + return n.to_bytes(length, byteorder, signed=signed) +def test_to_bytes() -> None: + assert to_bytes(255, 2, "big") == b'\x00\xff', to_bytes(255, 2, "big") + assert to_bytes(255, 2, "little") == b'\xff\x00', to_bytes(255, 2, "little") + assert to_bytes(-1, 2, "big", True) == b'\xff\xff', to_bytes(-1, 2, "big", True) + assert to_bytes(0, 1, "big") == b'\x00', to_bytes(0, 1, "big") + # test with a value that does not fit in 64 bits + assert to_bytes(10**30, 16, "big") == b'\x00\x00\x00\x0c\x9f,\x9c\xd0Ft\xed\xea@\x00\x00\x00', to_bytes(10**30, 16, "big") + # unsigned, too large for 1 byte + with assertRaises(OverflowError): + to_bytes(256, 1, "big") + # signed, too small for 1 byte + with assertRaises(OverflowError): + to_bytes(-129, 1, "big", True) + # signed, too large for 1 byte + with assertRaises(OverflowError): + to_bytes(128, 1, "big", True)