Skip to content

Commit e265ce8

Browse files
authored
gh-139871: Optimize small takes in bytearray.take_bytes (GH-141741)
When less than half the buffer is taken just copy that small part out rather than doing a big alloc + memmove + big shrink.
1 parent a35c683 commit e265ce8

File tree

2 files changed

+39
-1
lines changed

2 files changed

+39
-1
lines changed

Lib/test/test_bytes.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1524,6 +1524,32 @@ def test_take_bytes(self):
15241524
self.assertRaises(BufferError, ba.take_bytes)
15251525
self.assertEqual(ba.take_bytes(), b'abc')
15261526

1527+
@support.cpython_only # tests an implementation detail
1528+
def test_take_bytes_optimization(self):
1529+
# Validate optimization around taking lots of little chunks out of a
1530+
# much bigger buffer. Save work by only copying a little rather than
1531+
# moving a lot.
1532+
ba = bytearray(b'abcdef' + b'0' * 1000)
1533+
start_alloc = ba.__alloc__()
1534+
1535+
# Take two bytes at a time, checking alloc doesn't change.
1536+
self.assertEqual(ba.take_bytes(2), b'ab')
1537+
self.assertEqual(ba.__alloc__(), start_alloc)
1538+
self.assertEqual(len(ba), 4 + 1000)
1539+
self.assertEqual(ba.take_bytes(2), b'cd')
1540+
self.assertEqual(ba.__alloc__(), start_alloc)
1541+
self.assertEqual(len(ba), 2 + 1000)
1542+
self.assertEqual(ba.take_bytes(2), b'ef')
1543+
self.assertEqual(ba.__alloc__(), start_alloc)
1544+
self.assertEqual(len(ba), 0 + 1000)
1545+
self.assertEqual(ba.__alloc__(), start_alloc)
1546+
1547+
# Take over half, alloc shrinks to exact size.
1548+
self.assertEqual(ba.take_bytes(501), b'0' * 501)
1549+
self.assertEqual(len(ba), 499)
1550+
bytes_header_size = sys.getsizeof(b'')
1551+
self.assertEqual(ba.__alloc__(), 499 + bytes_header_size)
1552+
15271553
def test_setitem(self):
15281554
def setitem_as_mapping(b, i, val):
15291555
b[i] = val

Objects/bytearrayobject.c

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1547,8 +1547,20 @@ bytearray_take_bytes_impl(PyByteArrayObject *self, PyObject *n)
15471547
return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES);
15481548
}
15491549

1550-
// Copy remaining bytes to a new bytes.
15511550
Py_ssize_t remaining_length = size - to_take;
1551+
// optimization: If taking less than leaving, just copy the small to_take
1552+
// portion out and move ob_start.
1553+
if (to_take < remaining_length) {
1554+
PyObject *ret = PyBytes_FromStringAndSize(self->ob_start, to_take);
1555+
if (ret == NULL) {
1556+
return NULL;
1557+
}
1558+
self->ob_start += to_take;
1559+
Py_SET_SIZE(self, remaining_length);
1560+
return ret;
1561+
}
1562+
1563+
// Copy remaining bytes to a new bytes.
15521564
PyObject *remaining = PyBytes_FromStringAndSize(self->ob_start + to_take,
15531565
remaining_length);
15541566
if (remaining == NULL) {

0 commit comments

Comments
 (0)