@@ -1639,46 +1639,68 @@ IGzipReader_seek(IGzipReader *self, PyObject *args, PyObject *kwargs)
1639
1639
static PyObject *
1640
1640
IGzipReader_readall (IGzipReader * self , PyObject * Py_UNUSED (ignore ))
1641
1641
{
1642
- /* Try to consume the entire buffer without too much overallocation */
1643
- Py_ssize_t chunk_size = self -> buffer_size * 4 ;
1644
- PyObject * chunk_list = PyList_New (0 );
1645
- if (chunk_list == NULL ) {
1646
- return NULL ;
1647
- }
1642
+ /* Pretty standard pattern: create a lot of bytes objects, stuff them in
1643
+ a list, and join them.
1644
+ Optimizations:
1645
+ - Do not create a list but use static array and keep track of the
1646
+ number of bytes objects.
1647
+ - Start reading DEF_BUF_SIZE (16k) and increase by 2x.
1648
+ - The static array contains 48 slots. The 48th chunk will have size
1649
+ 2 ** 47 * 16k. That is 2 million TB. That should be quite future proof.
1650
+ - Since we kan keep track of the size while creating the chunks, there
1651
+ is no need to go over all the bytes objects again to calculate the
1652
+ total size. (This is what _PyBytes_Join does internally).
1653
+ - If there is only one item, return that one.
1654
+ */
1655
+ Py_ssize_t chunk_size = DEF_BUF_SIZE ;
1656
+ static PyObject * chunk_list [48 ];
1657
+ size_t number_of_chunks = 0 ;
1658
+ size_t total_size = 0 ;
1659
+ PyObject * ret = NULL ;
1648
1660
while (1 ) {
1649
1661
PyObject * chunk = PyBytes_FromStringAndSize (NULL , chunk_size );
1650
1662
if (chunk == NULL ) {
1651
- Py_DECREF (chunk_list );
1652
- return NULL ;
1663
+ goto readall_finish ;
1653
1664
}
1654
1665
ssize_t written_size = IGzipReader_read_into_buffer (
1655
1666
self , (uint8_t * )PyBytes_AS_STRING (chunk ), chunk_size );
1656
1667
if (written_size < 0 ) {
1657
1668
Py_DECREF (chunk );
1658
- Py_DECREF (chunk_list );
1659
- return NULL ;
1669
+ goto readall_finish ;
1660
1670
}
1661
- if (written_size == 0 ) {
1671
+ total_size += written_size ;
1672
+ chunk_list [number_of_chunks ] = chunk ;
1673
+ number_of_chunks += 1 ;
1674
+ chunk_size *= 2 ;
1675
+ if (written_size < chunk_size ) {
1676
+ // Reached the end, resize the smaller chunk
1677
+ if (_PyBytes_Resize (& chunk , written_size ) < 0 ) {
1678
+ goto readall_finish ;
1679
+ }
1662
1680
break ;
1663
1681
}
1664
- if (_PyBytes_Resize (& chunk , written_size ) < 0 ) {
1665
- Py_DECREF (chunk_list );
1666
- return NULL ;
1667
- }
1668
- if (PyList_Append (chunk_list , chunk ) < 0 ) {
1669
- Py_DECREF (chunk );
1670
- Py_DECREF (chunk_list );
1671
- return NULL ;
1672
- }
1673
1682
}
1674
- PyObject * empty_bytes = PyBytes_FromStringAndSize (NULL , 0 );
1675
- if (empty_bytes == NULL ) {
1676
- Py_DECREF (chunk_list );
1677
- return NULL ;
1683
+ if (number_of_chunks == 1 ) {
1684
+ // No need for an intermediate result. Return immediately.
1685
+ return chunk_list [0 ];
1686
+ }
1687
+ ret = PyBytes_FromStringAndSize (NULL , total_size );
1688
+ if (ret == NULL ) {
1689
+ goto readall_finish ;
1690
+ }
1691
+ char * ret_ptr = PyBytes_AS_STRING (ret );
1692
+ chunk_size = DEF_BUF_SIZE ;
1693
+ for (size_t i = 0 ; i < number_of_chunks ; i ++ ) {
1694
+ PyObject * chunk = chunk_list [i ];
1695
+ Py_ssize_t chunk_size = PyBytes_GET_SIZE (chunk );
1696
+ memcpy (ret , PyBytes_AS_STRING (chunk ), chunk_size );
1697
+ ret_ptr += chunk_size ;
1698
+ }
1699
+ readall_finish :
1700
+ for (size_t i = 0 ; i < number_of_chunks ; i ++ ) {
1701
+ Py_DECREF (chunk_list [i ]);
1678
1702
}
1679
- PyObject * ret = _PyBytes_Join (empty_bytes , chunk_list );
1680
- Py_DECREF (empty_bytes );
1681
- return ret ;
1703
+ return ret ;
1682
1704
}
1683
1705
1684
1706
static PyObject *
0 commit comments