Skip to content

Commit c56898a

Browse files
committed
Opt for always correct readall implementation
1 parent 099fc68 commit c56898a

File tree

1 file changed

+30
-55
lines changed

1 file changed

+30
-55
lines changed

src/isal/isal_zlibmodule.c

Lines changed: 30 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1658,73 +1658,48 @@ GzipReader_seek(GzipReader *self, PyObject *args, PyObject *kwargs)
16581658
}
16591659

16601660
static PyObject *
1661-
GzipReader_readall(GzipReader *self, PyObject *Py_UNUSED(ignore))
1661+
GzipReader_readall(GzipReader *self, PyObject *Py_UNUSED(ignore))
16621662
{
1663-
/* Pretty standard pattern: create a lot of bytes objects, stuff them in
1664-
a list, and join them.
1665-
Optimizations:
1666-
- Do not create a list but use static array and keep track of the
1667-
number of bytes objects.
1668-
- Start reading DEF_BUF_SIZE (16k) and increase by 2x.
1669-
- The static array contains 48 slots. The 48th chunk will have size
1670-
2 ** 47 * 16k. That is 2 million TB. That should be quite future proof.
1671-
- Since we kan keep track of the size while creating the chunks, there
1672-
is no need to go over all the bytes objects again to calculate the
1673-
total size. (This is what _PyBytes_Join does internally).
1674-
- If there is only one item, return that one.
1675-
*/
1676-
Py_ssize_t chunk_size = DEF_BUF_SIZE;
1677-
static PyObject *chunk_list[48];
1678-
size_t number_of_chunks = 0;
1679-
size_t total_size = 0;
1680-
PyObject *ret = NULL;
1663+
/* Try to consume the entire buffer without too much overallocation */
1664+
Py_ssize_t chunk_size = self->buffer_size * 4;
1665+
PyObject *chunk_list = PyList_New(0);
1666+
if (chunk_list == NULL) {
1667+
return NULL;
1668+
}
16811669
while (1) {
16821670
PyObject *chunk = PyBytes_FromStringAndSize(NULL, chunk_size);
16831671
if (chunk == NULL) {
1684-
goto readall_finish;
1672+
Py_DECREF(chunk_list);
1673+
return NULL;
16851674
}
16861675
ssize_t written_size = GzipReader_read_into_buffer(
16871676
self, (uint8_t *)PyBytes_AS_STRING(chunk), chunk_size);
16881677
if (written_size < 0) {
16891678
Py_DECREF(chunk);
1690-
goto readall_finish;
1679+
Py_DECREF(chunk_list);
1680+
return NULL;
16911681
}
1692-
total_size += written_size;
1693-
1694-
if (written_size < chunk_size) {
1695-
// Reached the end, resize the smaller chunk
1696-
if (_PyBytes_Resize(&chunk, written_size) < 0) {
1697-
goto readall_finish;
1698-
}
1699-
chunk_list[number_of_chunks] = chunk;
1700-
number_of_chunks += 1;
1682+
if (written_size == 0) {
17011683
break;
17021684
}
1703-
chunk_list[number_of_chunks] = chunk;
1704-
number_of_chunks += 1;
1705-
chunk_size *= 2;
1706-
}
1707-
if (number_of_chunks == 1) {
1708-
// No need for an intermediate result. Return immediately.
1709-
return chunk_list[0];
1710-
}
1711-
ret = PyBytes_FromStringAndSize(NULL, total_size);
1712-
if (ret == NULL) {
1713-
goto readall_finish;
1714-
}
1715-
char *ret_ptr = PyBytes_AS_STRING(ret);
1716-
chunk_size = DEF_BUF_SIZE;
1717-
for (size_t i=0; i < number_of_chunks; i++) {
1718-
PyObject *chunk = chunk_list[i];
1719-
Py_ssize_t chunk_size = PyBytes_GET_SIZE(chunk);
1720-
memcpy(ret_ptr, PyBytes_AS_STRING(chunk), chunk_size);
1721-
ret_ptr += chunk_size;
1722-
}
1723-
readall_finish:
1724-
for (size_t i=0; i < number_of_chunks; i++) {
1725-
Py_DECREF(chunk_list[i]);
1726-
}
1727-
return ret;
1685+
if (_PyBytes_Resize(&chunk, written_size) < 0) {
1686+
Py_DECREF(chunk_list);
1687+
return NULL;
1688+
}
1689+
if (PyList_Append(chunk_list, chunk) < 0) {
1690+
Py_DECREF(chunk);
1691+
Py_DECREF(chunk_list);
1692+
return NULL;
1693+
}
1694+
}
1695+
PyObject *empty_bytes = PyBytes_FromStringAndSize(NULL, 0);
1696+
if (empty_bytes == NULL) {
1697+
Py_DECREF(chunk_list);
1698+
return NULL;
1699+
}
1700+
PyObject *ret = _PyBytes_Join(empty_bytes, chunk_list);
1701+
Py_DECREF(empty_bytes);
1702+
return ret;
17281703
}
17291704

17301705
static PyObject *

0 commit comments

Comments
 (0)