Skip to content

Commit 3e7d539

Browse files
committed
Try a faster readall implementation
1 parent aa71e40 commit 3e7d539

File tree

1 file changed

+49
-27
lines changed

1 file changed

+49
-27
lines changed

src/isal/isal_zlibmodule.c

Lines changed: 49 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1639,46 +1639,68 @@ IGzipReader_seek(IGzipReader *self, PyObject *args, PyObject *kwargs)
16391639
static PyObject *
16401640
IGzipReader_readall(IGzipReader *self, PyObject *Py_UNUSED(ignore))
16411641
{
1642-
/* Try to consume the entire buffer without too much overallocation */
1643-
Py_ssize_t chunk_size = self->buffer_size * 4;
1644-
PyObject *chunk_list = PyList_New(0);
1645-
if (chunk_list == NULL) {
1646-
return NULL;
1647-
}
1642+
/* Pretty standard pattern: create a lot of bytes objects, stuff them in
1643+
a list, and join them.
1644+
Optimizations:
1645+
- Do not create a list but use static array and keep track of the
1646+
number of bytes objects.
1647+
- Start reading DEF_BUF_SIZE (16k) and increase by 2x.
1648+
- The static array contains 48 slots. The 48th chunk will have size
1649+
2 ** 47 * 16k. That is 2 million TB. That should be quite future proof.
1650+
- Since we kan keep track of the size while creating the chunks, there
1651+
is no need to go over all the bytes objects again to calculate the
1652+
total size. (This is what _PyBytes_Join does internally).
1653+
- If there is only one item, return that one.
1654+
*/
1655+
Py_ssize_t chunk_size = DEF_BUF_SIZE;
1656+
static PyObject *chunk_list[48];
1657+
size_t number_of_chunks = 0;
1658+
size_t total_size = 0;
1659+
PyObject *ret = NULL;
16481660
while (1) {
16491661
PyObject *chunk = PyBytes_FromStringAndSize(NULL, chunk_size);
16501662
if (chunk == NULL) {
1651-
Py_DECREF(chunk_list);
1652-
return NULL;
1663+
goto readall_finish;
16531664
}
16541665
ssize_t written_size = IGzipReader_read_into_buffer(
16551666
self, (uint8_t *)PyBytes_AS_STRING(chunk), chunk_size);
16561667
if (written_size < 0) {
16571668
Py_DECREF(chunk);
1658-
Py_DECREF(chunk_list);
1659-
return NULL;
1669+
goto readall_finish;
16601670
}
1661-
if (written_size == 0) {
1671+
total_size += written_size;
1672+
chunk_list[number_of_chunks] = chunk;
1673+
number_of_chunks += 1;
1674+
chunk_size *= 2;
1675+
if (written_size < chunk_size) {
1676+
// Reached the end, resize the smaller chunk
1677+
if (_PyBytes_Resize(&chunk, written_size) < 0) {
1678+
goto readall_finish;
1679+
}
16621680
break;
16631681
}
1664-
if (_PyBytes_Resize(&chunk, written_size) < 0) {
1665-
Py_DECREF(chunk_list);
1666-
return NULL;
1667-
}
1668-
if (PyList_Append(chunk_list, chunk) < 0) {
1669-
Py_DECREF(chunk);
1670-
Py_DECREF(chunk_list);
1671-
return NULL;
1672-
}
16731682
}
1674-
PyObject *empty_bytes = PyBytes_FromStringAndSize(NULL, 0);
1675-
if (empty_bytes == NULL) {
1676-
Py_DECREF(chunk_list);
1677-
return NULL;
1683+
if (number_of_chunks == 1) {
1684+
// No need for an intermediate result. Return immediately.
1685+
return chunk_list[0];
1686+
}
1687+
ret = PyBytes_FromStringAndSize(NULL, total_size);
1688+
if (ret == NULL) {
1689+
goto readall_finish;
1690+
}
1691+
char *ret_ptr = PyBytes_AS_STRING(ret);
1692+
chunk_size = DEF_BUF_SIZE;
1693+
for (size_t i=0; i < number_of_chunks; i++) {
1694+
PyObject *chunk = chunk_list[i];
1695+
Py_ssize_t chunk_size = PyBytes_GET_SIZE(chunk);
1696+
memcpy(ret, PyBytes_AS_STRING(chunk), chunk_size);
1697+
ret_ptr += chunk_size;
1698+
}
1699+
readall_finish:
1700+
for (size_t i=0; i < number_of_chunks; i++) {
1701+
Py_DECREF(chunk_list[i]);
16781702
}
1679-
PyObject *ret = _PyBytes_Join(empty_bytes, chunk_list);
1680-
Py_DECREF(empty_bytes);
1681-
return ret;
1703+
return ret;
16821704
}
16831705

16841706
static PyObject *

0 commit comments

Comments
 (0)