@@ -1658,73 +1658,48 @@ GzipReader_seek(GzipReader *self, PyObject *args, PyObject *kwargs)
1658
1658
}
1659
1659
1660
1660
static PyObject *
1661
- GzipReader_readall (GzipReader * self , PyObject * Py_UNUSED (ignore ))
1661
+ GzipReader_readall (GzipReader * self , PyObject * Py_UNUSED (ignore ))
1662
1662
{
1663
- /* Pretty standard pattern: create a lot of bytes objects, stuff them in
1664
- a list, and join them.
1665
- Optimizations:
1666
- - Do not create a list but use static array and keep track of the
1667
- number of bytes objects.
1668
- - Start reading DEF_BUF_SIZE (16k) and increase by 2x.
1669
- - The static array contains 48 slots. The 48th chunk will have size
1670
- 2 ** 47 * 16k. That is 2 million TB. That should be quite future proof.
1671
- - Since we kan keep track of the size while creating the chunks, there
1672
- is no need to go over all the bytes objects again to calculate the
1673
- total size. (This is what _PyBytes_Join does internally).
1674
- - If there is only one item, return that one.
1675
- */
1676
- Py_ssize_t chunk_size = DEF_BUF_SIZE ;
1677
- static PyObject * chunk_list [48 ];
1678
- size_t number_of_chunks = 0 ;
1679
- size_t total_size = 0 ;
1680
- PyObject * ret = NULL ;
1663
+ /* Try to consume the entire buffer without too much overallocation */
1664
+ Py_ssize_t chunk_size = self -> buffer_size * 4 ;
1665
+ PyObject * chunk_list = PyList_New (0 );
1666
+ if (chunk_list == NULL ) {
1667
+ return NULL ;
1668
+ }
1681
1669
while (1 ) {
1682
1670
PyObject * chunk = PyBytes_FromStringAndSize (NULL , chunk_size );
1683
1671
if (chunk == NULL ) {
1684
- goto readall_finish ;
1672
+ Py_DECREF (chunk_list );
1673
+ return NULL ;
1685
1674
}
1686
1675
ssize_t written_size = GzipReader_read_into_buffer (
1687
1676
self , (uint8_t * )PyBytes_AS_STRING (chunk ), chunk_size );
1688
1677
if (written_size < 0 ) {
1689
1678
Py_DECREF (chunk );
1690
- goto readall_finish ;
1679
+ Py_DECREF (chunk_list );
1680
+ return NULL ;
1691
1681
}
1692
- total_size += written_size ;
1693
-
1694
- if (written_size < chunk_size ) {
1695
- // Reached the end, resize the smaller chunk
1696
- if (_PyBytes_Resize (& chunk , written_size ) < 0 ) {
1697
- goto readall_finish ;
1698
- }
1699
- chunk_list [number_of_chunks ] = chunk ;
1700
- number_of_chunks += 1 ;
1682
+ if (written_size == 0 ) {
1701
1683
break ;
1702
1684
}
1703
- chunk_list [number_of_chunks ] = chunk ;
1704
- number_of_chunks += 1 ;
1705
- chunk_size *= 2 ;
1706
- }
1707
- if (number_of_chunks == 1 ) {
1708
- // No need for an intermediate result. Return immediately.
1709
- return chunk_list [0 ];
1710
- }
1711
- ret = PyBytes_FromStringAndSize (NULL , total_size );
1712
- if (ret == NULL ) {
1713
- goto readall_finish ;
1714
- }
1715
- char * ret_ptr = PyBytes_AS_STRING (ret );
1716
- chunk_size = DEF_BUF_SIZE ;
1717
- for (size_t i = 0 ; i < number_of_chunks ; i ++ ) {
1718
- PyObject * chunk = chunk_list [i ];
1719
- Py_ssize_t chunk_size = PyBytes_GET_SIZE (chunk );
1720
- memcpy (ret_ptr , PyBytes_AS_STRING (chunk ), chunk_size );
1721
- ret_ptr += chunk_size ;
1722
- }
1723
- readall_finish :
1724
- for (size_t i = 0 ; i < number_of_chunks ; i ++ ) {
1725
- Py_DECREF (chunk_list [i ]);
1726
- }
1727
- return ret ;
1685
+ if (_PyBytes_Resize (& chunk , written_size ) < 0 ) {
1686
+ Py_DECREF (chunk_list );
1687
+ return NULL ;
1688
+ }
1689
+ if (PyList_Append (chunk_list , chunk ) < 0 ) {
1690
+ Py_DECREF (chunk );
1691
+ Py_DECREF (chunk_list );
1692
+ return NULL ;
1693
+ }
1694
+ }
1695
+ PyObject * empty_bytes = PyBytes_FromStringAndSize (NULL , 0 );
1696
+ if (empty_bytes == NULL ) {
1697
+ Py_DECREF (chunk_list );
1698
+ return NULL ;
1699
+ }
1700
+ PyObject * ret = _PyBytes_Join (empty_bytes , chunk_list );
1701
+ Py_DECREF (empty_bytes );
1702
+ return ret ;
1728
1703
}
1729
1704
1730
1705
static PyObject *
0 commit comments