Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
ab75317
[mypyc] feat: new primitives for `bytes.rjust` and `bytes.ljust`
BobTheBuidler Sep 12, 2025
02d76a5
fix: tests
BobTheBuidler Sep 12, 2025
6453373
Update bytes_ops.c
BobTheBuidler Sep 12, 2025
0c33019
fix: tests
BobTheBuidler Sep 12, 2025
2baf425
add headers
BobTheBuidler Sep 12, 2025
350db60
deatil asserts
BobTheBuidler Sep 12, 2025
ee1a76e
fix ir
BobTheBuidler Sep 12, 2025
e5d6e43
fix run tests
BobTheBuidler Sep 12, 2025
b936c3c
Update bytes_ops.c
BobTheBuidler Sep 12, 2025
c5ed8a4
Update bytes_ops.py
BobTheBuidler Sep 12, 2025
3db102b
Update bytes_ops.c
BobTheBuidler Sep 12, 2025
4fd876f
fix pyssizet
BobTheBuidler Sep 12, 2025
b82c603
fix headers
BobTheBuidler Sep 12, 2025
a106445
fix: ;
BobTheBuidler Sep 12, 2025
a44e9b5
fix ir
BobTheBuidler Sep 12, 2025
10e5093
drop dupe tests
BobTheBuidler Sep 12, 2025
600b276
Update bytes_ops.c
BobTheBuidler Sep 12, 2025
eaee990
Update CPy.h
BobTheBuidler Sep 12, 2025
3e66f36
Update bytes_ops.py
BobTheBuidler Sep 12, 2025
0edcb0d
Update bytes_ops.c
BobTheBuidler Sep 12, 2025
1be3982
Update irbuild-bytes.test
BobTheBuidler Sep 12, 2025
c1caaa2
optimize c funcs
BobTheBuidler Sep 12, 2025
b28b6d1
fix ir
BobTheBuidler Sep 12, 2025
b63c06d
Merge branch 'master' into just
BobTheBuidler Sep 30, 2025
5aee840
Merge branch 'master' into just
BobTheBuidler Oct 2, 2025
51fe159
Merge branch 'master' into just
BobTheBuidler Oct 4, 2025
b780cb8
Merge branch 'master' into just
BobTheBuidler Oct 10, 2025
1e50094
Merge branch 'master' into just
BobTheBuidler Oct 14, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions mypyc/lib-rt/CPy.h
Original file line number Diff line number Diff line change
Expand Up @@ -779,6 +779,10 @@ CPyTagged CPyBytes_GetItem(PyObject *o, CPyTagged index);
PyObject *CPyBytes_Concat(PyObject *a, PyObject *b);
PyObject *CPyBytes_Join(PyObject *sep, PyObject *iter);
CPyTagged CPyBytes_Ord(PyObject *obj);
PyObject *CPyBytes_LjustDefaultFill(PyObject *self, CPyTagged width);
PyObject *CPyBytes_RjustDefaultFill(PyObject *self, CPyTagged width);
PyObject *CPyBytes_LjustCustomFill(PyObject *self, CPyTagged width, PyObject *fillbyte);
PyObject *CPyBytes_RjustCustomFill(PyObject *self, CPyTagged width, PyObject *fillbyte);


int CPyBytes_Compare(PyObject *left, PyObject *right);
Expand Down
95 changes: 95 additions & 0 deletions mypyc/lib-rt/bytes_ops.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

#include <Python.h>
#include "CPy.h"
#include <string.h>

// Returns -1 on error, 0 on inequality, 1 on equality.
//
Expand Down Expand Up @@ -162,3 +163,97 @@ CPyTagged CPyBytes_Ord(PyObject *obj) {
PyErr_SetString(PyExc_TypeError, "ord() expects a character");
return CPY_INT_TAG;
}


PyObject *CPyBytes_RjustDefaultFill(PyObject *self, CPyTagged width) {
if (!PyBytes_Check(self)) {
PyErr_SetString(PyExc_TypeError, "self must be bytes");
return NULL;
}
Py_ssize_t width_size_t = CPyTagged_AsSsize_t(width);
Py_ssize_t len = PyBytes_Size(self);
if (width_size_t <= len) {
Py_INCREF(self);
return self;
}
Py_ssize_t pad = width_size_t - len;
PyObject *result = PyBytes_FromStringAndSize(NULL, width_size_t);
if (!result) return NULL;
char *res_buf = PyBytes_AsString(result);
memset(res_buf, ' ', pad);
memcpy(res_buf + pad, PyBytes_AsString(self), len);
return result;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please share most of the implementations using a helper function, since the functions are very similar (across all four functions), and these methods probably aren't super performance critical to make the code duplication worth it (the likely gains would be very small, since most of the CPU is spent in the common shared code). E.g. add a function that takes the bytes value, width, a bool flag for ljust/rjust and the fill character (char) as arguments.

}


PyObject *CPyBytes_RjustCustomFill(PyObject *self, CPyTagged width, PyObject *fillbyte) {
if (!PyBytes_Check(self)) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we get rid of these type checks?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A variable annotated with bytes can also be a bytearray, and all primitives need to support both. Also we need to consider subclasses overriding the method -- we only assume a small number of key methods are not overridden, and we try to keep this set small. These methods are not common enough to be worth adding as special cases, when more commonly used methods support overriding. See CPyBytes_Join for an example of how to do this -- special case for exact bytes values (i.e. no subclass instances) and provide a fallback implementation using generic operations.

PyErr_SetString(PyExc_TypeError, "self must be bytes");
return NULL;
}
if (!PyBytes_Check(fillbyte) || PyBytes_Size(fillbyte) != 1) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we get rid of these type checks?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See above -- we need type checks, and they must use an exact bytes check, and there must also be a fallback implementation.

PyErr_SetString(PyExc_TypeError, "fillbyte must be a single byte");
return NULL;
}
Py_ssize_t width_size_t = CPyTagged_AsSsize_t(width);
Py_ssize_t len = PyBytes_Size(self);
if (width_size_t <= len) {
Py_INCREF(self);
return self;
}
char fill = PyBytes_AsString(fillbyte)[0];
Py_ssize_t pad = width_size_t - len;
PyObject *result = PyBytes_FromStringAndSize(NULL, width_size_t);
if (!result) return NULL;
char *res_buf = PyBytes_AsString(result);
memset(res_buf, fill, pad);
memcpy(res_buf + pad, PyBytes_AsString(self), len);
return result;
}


PyObject *CPyBytes_LjustDefaultFill(PyObject *self, CPyTagged width) {
if (!PyBytes_Check(self)) {
PyErr_SetString(PyExc_TypeError, "self must be bytes");
return NULL;
}
Py_ssize_t width_size_t = CPyTagged_AsSsize_t(width);
Py_ssize_t len = PyBytes_Size(self);
if (width_size_t <= len) {
Py_INCREF(self);
return self;
}
Py_ssize_t pad = width_size_t - len;
PyObject *result = PyBytes_FromStringAndSize(NULL, width_size_t);
if (!result) return NULL;
char *res_buf = PyBytes_AsString(result);
memcpy(res_buf, PyBytes_AsString(self), len);
memset(res_buf + len, ' ', pad);
return result;
}


PyObject *CPyBytes_LjustCustomFill(PyObject *self, CPyTagged width, PyObject *fillbyte) {
if (!PyBytes_Check(self)) {
PyErr_SetString(PyExc_TypeError, "self must be bytes");
return NULL;
}
if (!PyBytes_Check(fillbyte) || PyBytes_Size(fillbyte) != 1) {
PyErr_SetString(PyExc_TypeError, "fillbyte must be a single byte");
return NULL;
}
Py_ssize_t width_size_t = CPyTagged_AsSsize_t(width);
Py_ssize_t len = PyBytes_Size(self);
if (width_size_t <= len) {
Py_INCREF(self);
return self;
}
char fill = PyBytes_AsString(fillbyte)[0];
Py_ssize_t pad = width_size_t - len;
PyObject *result = PyBytes_FromStringAndSize(NULL, width_size_t);
if (!result) return NULL;
char *res_buf = PyBytes_AsString(result);
memcpy(res_buf, PyBytes_AsString(self), len);
memset(res_buf + len, fill, pad);
return result;
}
36 changes: 36 additions & 0 deletions mypyc/primitives/bytes_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,3 +126,39 @@
c_function_name="CPyBytes_Ord",
error_kind=ERR_MAGIC,
)

# bytes.rjust(width)
method_op(
name="rjust",
arg_types=[bytes_rprimitive, int_rprimitive],
return_type=bytes_rprimitive,
c_function_name="CPyBytes_RjustDefaultFill",
error_kind=ERR_MAGIC,
)

# bytes.rjust(width, fillbyte)
method_op(
name="rjust",
arg_types=[bytes_rprimitive, int_rprimitive, bytes_rprimitive],
return_type=bytes_rprimitive,
c_function_name="CPyBytes_RjustCustomFill",
error_kind=ERR_MAGIC,
)

# bytes.ljust(width)
method_op(
name="ljust",
arg_types=[bytes_rprimitive, int_rprimitive],
return_type=bytes_rprimitive,
c_function_name="CPyBytes_LjustDefaultFill",
error_kind=ERR_MAGIC,
)

# bytes.ljust(width, fillbyte)
method_op(
name="ljust",
arg_types=[bytes_rprimitive, int_rprimitive, bytes_rprimitive],
return_type=bytes_rprimitive,
c_function_name="CPyBytes_LjustCustomFill",
error_kind=ERR_MAGIC,
)
2 changes: 2 additions & 0 deletions mypyc/test-data/fixtures/ir.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,8 @@ def __getitem__(self, i: slice) -> bytes: ...
def join(self, x: Iterable[object]) -> bytes: ...
def decode(self, encoding: str=..., errors: str=...) -> str: ...
def __iter__(self) -> Iterator[int]: ...
def ljust(self, width: int, fillchar: bytes | bytearray = b" ") -> bytes: ...
def rjust(self, width: int, fillchar: bytes | bytearray = b" ") -> bytes: ...

class bytearray:
@overload
Expand Down
42 changes: 42 additions & 0 deletions mypyc/test-data/irbuild-bytes.test
Original file line number Diff line number Diff line change
Expand Up @@ -217,3 +217,45 @@ L2:
L3:
keep_alive y
return r2

[case testBytesRjustDefault]
def f(b: bytes) -> bytes:
return b.rjust(6)
[out]
def f(b):
b, r0 :: bytes
L0:
r0 = CPyBytes_RjustDefaultFill(b, 12)
return r0

[case testBytesRjustCustom]
def f(b: bytes) -> bytes:
return b.rjust(8, b'0')
[out]
def f(b):
b, r0, r1 :: bytes
L0:
r0 = b'0'
r1 = CPyBytes_RjustCustomFill(b, 16, r0)
return r1

[case testBytesLjustDefault]
def f(b: bytes) -> bytes:
return b.ljust(7)
[out]
def f(b):
b, r0 :: bytes
L0:
r0 = CPyBytes_LjustDefaultFill(b, 14)
return r0

[case testBytesLjustCustom]
def f(b: bytes) -> bytes:
return b.ljust(10, b'_')
[out]
def f(b):
b, r0, r1 :: bytes
L0:
r0 = b'_'
r1 = CPyBytes_LjustCustomFill(b, 20, r0)
return r1
40 changes: 40 additions & 0 deletions mypyc/test-data/run-bytes.test
Original file line number Diff line number Diff line change
Expand Up @@ -401,3 +401,43 @@ def test_optional_ne() -> None:
assert ne_b_b_opt(b'x', b'y')
assert ne_b_b_opt(b'y', b'x')
assert ne_b_b_opt(b'x', None)

[case testBytesRjustLjust]
from testutil import assertRaises

def rjust_bytes(b: bytes, width: int, fill: bytes = b' ') -> bytes:
return b.rjust(width, fill)

def ljust_bytes(b: bytes, width: int, fill: bytes = b' ') -> bytes:
return b.ljust(width, fill)

def test_rjust_with_default_fill() -> None:
assert rjust_bytes(b'abc', 6) == b' abc', rjust_bytes(b'abc', 6)
assert rjust_bytes(b'abc', 3) == b'abc', rjust_bytes(b'abc', 3)
assert rjust_bytes(b'abc', 2) == b'abc', rjust_bytes(b'abc', 2)
assert rjust_bytes(b'', 4) == b' ', rjust_bytes(b'', 4)

def test_rjust_with_custom_fill() -> None:
assert rjust_bytes(b'abc', 6, b'0') == b'000abc', rjust_bytes(b'abc', 6, b'0')
assert rjust_bytes(b'abc', 5, b'_') == b'__abc', rjust_bytes(b'abc', 5, b'_')
assert rjust_bytes(b'abc', 3, b'X') == b'abc', rjust_bytes(b'abc', 3, b'X')

def test_ljust_with_default_fill() -> None:
assert ljust_bytes(b'abc', 6) == b'abc ', ljust_bytes(b'abc', 6)
assert ljust_bytes(b'abc', 3) == b'abc', ljust_bytes(b'abc', 3)
assert ljust_bytes(b'abc', 2) == b'abc', ljust_bytes(b'abc', 2)
assert ljust_bytes(b'', 4) == b' ', ljust_bytes(b'', 4)

def test_ljust_with_custom_fill() -> None:
assert ljust_bytes(b'abc', 6, b'0') == b'abc000', ljust_bytes(b'abc', 6, b'0')
assert ljust_bytes(b'abc', 5, b'_') == b'abc__', ljust_bytes(b'abc', 5, b'_')
assert ljust_bytes(b'abc', 3, b'X') == b'abc', ljust_bytes(b'abc', 3, b'X')

def test_edge_cases() -> None:
assert rjust_bytes(b'abc', 0) == b'abc', rjust_bytes(b'abc', 0)
assert ljust_bytes(b'abc', 0) == b'abc', ljust_bytes(b'abc', 0)
# fillbyte must be length 1
with assertRaises(TypeError):
rjust_bytes(b'abc', 5, b'')
with assertRaises(TypeError):
ljust_bytes(b'abc', 5, b'12')