@@ -1440,166 +1440,171 @@ GzipReader_read_into_buffer(GzipReader *self, uint8_t *out_buffer, size_t out_bu
1440
1440
return -1 ;
1441
1441
}
1442
1442
Py_ssize_t bytes_written = 0 ;
1443
+ /* Outer loop is the file read in loop */
1443
1444
while (1 ) {
1444
- /* Allow escaping the GIL except when updating the buffer or when
1445
- throwing errors. This makes a big difference for BGZF format gzip
1446
- blocks. */
1445
+ uint8_t * current_pos = self -> current_pos ;
1446
+ uint8_t * buffer_end = self -> buffer_end ;
1447
+ /* Inner loop fills the out buffer, with multiple gzip blocks if
1448
+ necessary. Allow escaping the GIL except when throwing errors.
1449
+ This makes a big difference for BGZF format gzip blocks.
1450
+ Threads are blocked when the loop is exited. */
1447
1451
PyThreadState * _save ;
1448
1452
Py_UNBLOCK_THREADS
1449
- uint8_t * current_pos = self -> current_pos ;
1450
- uint8_t * buffer_end = self -> buffer_end ;
1451
- switch (self -> stream_phase ) {
1452
- size_t remaining ; // Must be before labels.
1453
- case GzipReader_HEADER :
1454
- GzipReader_read_header :
1455
- remaining = buffer_end - current_pos ;
1456
- if (remaining == 0 && self -> all_bytes_read ) {
1457
- // Reached EOF
1458
- self -> _size = self -> _pos ;
1459
- self -> current_pos = current_pos ;
1460
- Py_BLOCK_THREADS ;
1461
- return bytes_written ;
1462
- }
1463
- if ((remaining ) < 10 ) {
1464
- break ;
1465
- }
1466
- uint8_t magic1 = current_pos [0 ];
1467
- uint8_t magic2 = current_pos [1 ];
1468
-
1469
- if (!(magic1 == 0x1f && magic2 == 0x8b )) {
1470
- Py_BLOCK_THREADS ;
1471
- PyErr_Format (BadGzipFile ,
1472
- "Not a gzipped file (%R)" ,
1473
- PyBytes_FromStringAndSize ((char * )current_pos , 2 ));
1474
- return -1 ;
1475
- };
1476
- uint8_t method = current_pos [2 ];
1477
- if (method != 8 ) {
1478
- Py_BLOCK_THREADS ;
1479
- PyErr_SetString (BadGzipFile , "Unknown compression method" );
1480
- return -1 ;
1481
- }
1482
- uint8_t flags = current_pos [3 ];
1483
- self -> _last_mtime = * (uint32_t * )(current_pos + 4 );
1484
- // Skip XFL and header flag
1485
- uint8_t * header_cursor = current_pos + 10 ;
1486
- if (flags & FEXTRA ) {
1487
- // Read the extra field and discard it.
1488
- if (header_cursor + 2 >= buffer_end ) {
1453
+ while (1 ) {
1454
+ switch (self -> stream_phase ) {
1455
+ size_t remaining ; // Must be before labels.
1456
+ case GzipReader_HEADER :
1457
+ remaining = buffer_end - current_pos ;
1458
+ if (remaining == 0 && self -> all_bytes_read ) {
1459
+ // Reached EOF
1460
+ self -> _size = self -> _pos ;
1461
+ self -> current_pos = current_pos ;
1462
+ Py_BLOCK_THREADS ;
1463
+ return bytes_written ;
1464
+ }
1465
+ if ((remaining ) < 10 ) {
1489
1466
break ;
1490
1467
}
1491
- uint16_t flength = * (uint16_t * )header_cursor ;
1492
- header_cursor += 2 ;
1493
- if (header_cursor + flength >= buffer_end ) {
1494
- break ;
1468
+ uint8_t magic1 = current_pos [0 ];
1469
+ uint8_t magic2 = current_pos [1 ];
1470
+
1471
+ if (!(magic1 == 0x1f && magic2 == 0x8b )) {
1472
+ Py_BLOCK_THREADS ;
1473
+ PyErr_Format (BadGzipFile ,
1474
+ "Not a gzipped file (%R)" ,
1475
+ PyBytes_FromStringAndSize ((char * )current_pos , 2 ));
1476
+ return -1 ;
1477
+ };
1478
+ uint8_t method = current_pos [2 ];
1479
+ if (method != 8 ) {
1480
+ Py_BLOCK_THREADS ;
1481
+ PyErr_SetString (BadGzipFile , "Unknown compression method" );
1482
+ return -1 ;
1495
1483
}
1496
- header_cursor += flength ;
1497
- }
1498
- if (flags & FNAME ) {
1499
- header_cursor = memchr (header_cursor , 0 , buffer_end - header_cursor );
1500
- if (header_cursor == NULL ) {
1501
- break ;
1484
+ uint8_t flags = current_pos [3 ];
1485
+ self -> _last_mtime = * (uint32_t * )(current_pos + 4 );
1486
+ // Skip XFL and header flag
1487
+ uint8_t * header_cursor = current_pos + 10 ;
1488
+ if (flags & FEXTRA ) {
1489
+ // Read the extra field and discard it.
1490
+ if (header_cursor + 2 >= buffer_end ) {
1491
+ break ;
1492
+ }
1493
+ uint16_t flength = * (uint16_t * )header_cursor ;
1494
+ header_cursor += 2 ;
1495
+ if (header_cursor + flength >= buffer_end ) {
1496
+ break ;
1497
+ }
1498
+ header_cursor += flength ;
1502
1499
}
1503
- // skip over the 0 value;
1504
- header_cursor += 1 ;
1505
- }
1506
- if (flags & FCOMMENT ) {
1507
- header_cursor = memchr (header_cursor , 0 , buffer_end - header_cursor );
1508
- if (header_cursor == NULL ) {
1509
- break ;
1500
+ if (flags & FNAME ) {
1501
+ header_cursor = memchr (header_cursor , 0 , buffer_end - header_cursor );
1502
+ if (header_cursor == NULL ) {
1503
+ break ;
1504
+ }
1505
+ // skip over the 0 value;
1506
+ header_cursor += 1 ;
1507
+ }
1508
+ if (flags & FCOMMENT ) {
1509
+ header_cursor = memchr (header_cursor , 0 , buffer_end - header_cursor );
1510
+ if (header_cursor == NULL ) {
1511
+ break ;
1512
+ }
1513
+ // skip over the 0 value;
1514
+ header_cursor += 1 ;
1510
1515
}
1511
- // skip over the 0 value;
1512
- header_cursor += 1 ;
1513
- }
1514
- if (flags & FHCRC ) {
1515
- if (header_cursor + 2 >= buffer_end ) {
1516
+ if (flags & FHCRC ) {
1517
+ if (header_cursor + 2 >= buffer_end ) {
1518
+ break ;
1519
+ }
1520
+ uint16_t header_crc = * (uint16_t * )header_cursor ;
1521
+ uint16_t crc = crc32_gzip_refl (
1522
+ 0 , current_pos , header_cursor - current_pos ) & 0xFFFF ;
1523
+ if (header_crc != crc ) {
1524
+ Py_BLOCK_THREADS ;
1525
+ PyErr_Format (
1526
+ BadGzipFile ,
1527
+ "Corrupted gzip header. Checksums do not "
1528
+ "match: %04x != %04x" ,
1529
+ crc , header_crc
1530
+ );
1531
+ return -1 ;
1532
+ }
1533
+ header_cursor += 2 ;
1534
+ }
1535
+ current_pos = header_cursor ;
1536
+ isal_inflate_reset (& self -> state );
1537
+ self -> stream_phase = GzipReader_DEFLATE_BLOCK ;
1538
+ case GzipReader_DEFLATE_BLOCK :
1539
+ self -> state .next_in = current_pos ;
1540
+ self -> state .avail_in = buffer_end - current_pos ;
1541
+ self -> state .next_out = out_buffer ;
1542
+ self -> state .avail_out = out_buffer_size ;
1543
+ int ret ;
1544
+ ret = isal_inflate (& self -> state );
1545
+ if (ret != ISAL_DECOMP_OK ) {
1546
+ Py_BLOCK_THREADS ;
1547
+ isal_inflate_error (ret );
1548
+ return -1 ;
1549
+ }
1550
+ size_t current_bytes_written = self -> state .next_out - out_buffer ;
1551
+ bytes_written += current_bytes_written ;
1552
+ self -> _pos += current_bytes_written ;
1553
+ out_buffer = self -> state .next_out ;
1554
+ out_buffer_size = self -> state .avail_out ;
1555
+ current_pos = self -> state .next_in ;
1556
+ if (!(self -> state .block_state == ISAL_BLOCK_FINISH )) {
1557
+ if (self -> state .avail_out > 0 ) {
1558
+ break ;
1559
+ }
1560
+ self -> current_pos = current_pos ;
1561
+ Py_BLOCK_THREADS ;
1562
+ return bytes_written ;
1563
+ }
1564
+ current_pos -= bitbuffer_size (& self -> state );
1565
+ // Block done check trailer.
1566
+ self -> stream_phase = GzipReader_TRAILER ;
1567
+ case GzipReader_TRAILER :
1568
+ if (buffer_end - current_pos < 8 ) {
1516
1569
break ;
1517
1570
}
1518
- uint16_t header_crc = * (uint16_t * )header_cursor ;
1519
- uint16_t crc = crc32_gzip_refl (
1520
- 0 , current_pos , header_cursor - current_pos ) & 0xFFFF ;
1521
- if (header_crc != crc ) {
1571
+ uint32_t crc = * (uint32_t * )current_pos ;
1572
+ current_pos += 4 ;
1573
+ if (crc != self -> state .crc ) {
1522
1574
Py_BLOCK_THREADS ;
1523
1575
PyErr_Format (
1524
- BadGzipFile ,
1525
- "Corrupted gzip header. Checksums do not "
1526
- "match: %04x != %04x" ,
1527
- crc , header_crc
1576
+ BadGzipFile ,
1577
+ "CRC check failed %u != %u" ,
1578
+ crc , self -> state .crc
1528
1579
);
1529
1580
return -1 ;
1530
1581
}
1531
- header_cursor += 2 ;
1532
- }
1533
- current_pos = header_cursor ;
1534
- isal_inflate_reset (& self -> state );
1535
- self -> stream_phase = GzipReader_DEFLATE_BLOCK ;
1536
- case GzipReader_DEFLATE_BLOCK :
1537
- self -> state .next_in = current_pos ;
1538
- self -> state .avail_in = buffer_end - current_pos ;
1539
- self -> state .next_out = out_buffer ;
1540
- self -> state .avail_out = out_buffer_size ;
1541
- int ret ;
1542
- ret = isal_inflate (& self -> state );
1543
- if (ret != ISAL_DECOMP_OK ) {
1544
- Py_BLOCK_THREADS ;
1545
- isal_inflate_error (ret );
1546
- return -1 ;
1547
- }
1548
- size_t current_bytes_written = self -> state .next_out - out_buffer ;
1549
- bytes_written += current_bytes_written ;
1550
- self -> _pos += current_bytes_written ;
1551
- out_buffer = self -> state .next_out ;
1552
- out_buffer_size = self -> state .avail_out ;
1553
- current_pos = self -> state .next_in ;
1554
- if (!(self -> state .block_state == ISAL_BLOCK_FINISH )) {
1555
- if (self -> state .avail_out > 0 ) {
1556
- break ;
1582
+ uint32_t length = * (uint32_t * )current_pos ;
1583
+ current_pos += 4 ;
1584
+ if (length != self -> state .total_out ) {
1585
+ Py_BLOCK_THREADS ;
1586
+ PyErr_SetString (BadGzipFile , "Incorrect length of data produced" );
1587
+ return -1 ;
1557
1588
}
1558
- self -> current_pos = current_pos ;
1559
- Py_BLOCK_THREADS ;
1560
- return bytes_written ;
1561
- }
1562
- current_pos -= bitbuffer_size (& self -> state );
1563
- // Block done check trailer.
1564
- self -> stream_phase = GzipReader_TRAILER ;
1565
- case GzipReader_TRAILER :
1566
- if (buffer_end - current_pos < 8 ) {
1567
- break ;
1568
- }
1569
- uint32_t crc = * (uint32_t * )current_pos ;
1570
- current_pos += 4 ;
1571
- if (crc != self -> state .crc ) {
1572
- Py_BLOCK_THREADS ;
1573
- PyErr_Format (
1574
- BadGzipFile ,
1575
- "CRC check failed %u != %u" ,
1576
- crc , self -> state .crc
1577
- );
1578
- return -1 ;
1579
- }
1580
- uint32_t length = * (uint32_t * )current_pos ;
1581
- current_pos += 4 ;
1582
- if (length != self -> state .total_out ) {
1583
- Py_BLOCK_THREADS ;
1584
- PyErr_SetString (BadGzipFile , "Incorrect length of data produced" );
1585
- return -1 ;
1586
- }
1587
- self -> stream_phase = GzipReader_NULL_BYTES ;
1588
- case GzipReader_NULL_BYTES :
1589
- // There maybe NULL bytes between gzip members
1590
- while (current_pos < buffer_end ) {
1591
- if (* current_pos != 0 ) {
1592
- self -> stream_phase = GzipReader_HEADER ;
1593
- // Use goto to prevent unnecessarily refreshing the buffer;
1594
- goto GzipReader_read_header ;
1589
+ self -> stream_phase = GzipReader_NULL_BYTES ;
1590
+ case GzipReader_NULL_BYTES :
1591
+ // There maybe NULL bytes between gzip members
1592
+ while (current_pos < buffer_end ) {
1593
+ if (* current_pos != 0 ) {
1594
+ self -> stream_phase = GzipReader_HEADER ;
1595
+ break ;
1596
+ }
1597
+ current_pos += 1 ;
1595
1598
}
1596
- current_pos += 1 ;
1597
- }
1598
- if (current_pos >= buffer_end ) {
1599
- break ;
1600
- }
1601
- default :
1602
- Py_UNREACHABLE ();
1599
+ if (current_pos >= buffer_end ) {
1600
+ break ;
1601
+ }
1602
+ // Continue to prevent refreshing the buffer for each block.
1603
+ continue ;
1604
+ default :
1605
+ Py_UNREACHABLE ();
1606
+ }
1607
+ break ;
1603
1608
}
1604
1609
Py_BLOCK_THREADS ;
1605
1610
// If buffer_end is reached, nothing was returned and all bytes are
0 commit comments