Skip to content

Commit fbb742a

Browse files
authored
Merge pull request #154 from pycompression/cleanup
Minor refactorings on GzipReader
2 parents f26cd85 + e215d5e commit fbb742a

File tree

1 file changed

+90
-75
lines changed

1 file changed

+90
-75
lines changed

src/isal/isal_zlibmodule.c

Lines changed: 90 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,40 @@ isal_zlib_crc32(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
254254
PyBuffer_Release(&data);
255255
return return_value;
256256
}
257+
258+
PyDoc_STRVAR(isal_zlib_crc32_combine__doc__,
259+
"crc32_combine($module, crc1, crc2, crc2_length /)\n"
260+
"--\n"
261+
"\n"
262+
"Combine crc1 and crc2 into a new crc that is accurate for the combined data \n"
263+
"blocks that crc1 and crc2 where calculated from.\n"
264+
"\n"
265+
" crc1\n"
266+
" the first crc32 checksum\n"
267+
" crc2\n"
268+
" the second crc32 checksum\n"
269+
" crc2_length\n"
270+
" the lenght of the data block crc2 was calculated from\n"
271+
);
272+
273+
274+
#define ISAL_ZLIB_CRC32_COMBINE_METHODDEF \
275+
{"crc32_combine", (PyCFunction)(void(*)(void))isal_zlib_crc32_combine, \
276+
METH_VARARGS, isal_zlib_crc32_combine__doc__}
277+
278+
static PyObject *
279+
isal_zlib_crc32_combine(PyObject *module, PyObject *args) {
280+
uint32_t crc1 = 0;
281+
uint32_t crc2 = 0;
282+
Py_ssize_t crc2_length = 0;
283+
static char *format = "IIn:crc32combine";
284+
if (PyArg_ParseTuple(args, format, &crc1, &crc2, &crc2_length) < 0) {
285+
return NULL;
286+
}
287+
return PyLong_FromUnsignedLong(
288+
crc32_comb(crc1, crc2, crc2_length) & 0xFFFFFFFF);
289+
}
290+
257291
PyDoc_STRVAR(zlib_compress__doc__,
258292
"compress($module, data, /, level=ISAL_DEFAULT_COMPRESSION, wbits=MAX_WBITS)\n"
259293
"--\n"
@@ -1188,56 +1222,6 @@ isal_zlib_Decompress_flush(decompobject *self, PyObject *const *args, Py_ssize_t
11881222
return isal_zlib_Decompress_flush_impl(self, length);
11891223
}
11901224

1191-
PyDoc_STRVAR(isal_zlib_crc32_combine__doc__,
1192-
"crc32_combine($module, crc1, crc2, crc2_length /)\n"
1193-
"--\n"
1194-
"\n"
1195-
"Combine crc1 and crc2 into a new crc that is accurate for the combined data \n"
1196-
"blocks that crc1 and crc2 where calculated from.\n"
1197-
"\n"
1198-
" crc1\n"
1199-
" the first crc32 checksum\n"
1200-
" crc2\n"
1201-
" the second crc32 checksum\n"
1202-
" crc2_length\n"
1203-
" the lenght of the data block crc2 was calculated from\n"
1204-
);
1205-
1206-
1207-
#define ISAL_ZLIB_CRC32_COMBINE_METHODDEF \
1208-
{"crc32_combine", (PyCFunction)(void(*)(void))isal_zlib_crc32_combine, \
1209-
METH_VARARGS, isal_zlib_crc32_combine__doc__}
1210-
1211-
static PyObject *
1212-
isal_zlib_crc32_combine(PyObject *module, PyObject *args) {
1213-
uint32_t crc1 = 0;
1214-
uint32_t crc2 = 0;
1215-
Py_ssize_t crc2_length = 0;
1216-
static char *format = "IIn:crc32combine";
1217-
if (PyArg_ParseTuple(args, format, &crc1, &crc2, &crc2_length) < 0) {
1218-
return NULL;
1219-
}
1220-
return PyLong_FromUnsignedLong(
1221-
crc32_comb(crc1, crc2, crc2_length) & 0xFFFFFFFF);
1222-
}
1223-
1224-
1225-
typedef struct {
1226-
PyTypeObject *Comptype;
1227-
PyTypeObject *Decomptype;
1228-
PyObject *IsalError;
1229-
} isal_zlib_state;
1230-
1231-
static PyMethodDef IsalZlibMethods[] = {
1232-
ISAL_ZLIB_ADLER32_METHODDEF,
1233-
ISAL_ZLIB_CRC32_METHODDEF,
1234-
ISAL_ZLIB_CRC32_COMBINE_METHODDEF,
1235-
ISAL_ZLIB_COMPRESS_METHODDEF,
1236-
ISAL_ZLIB_DECOMPRESS_METHODDEF,
1237-
ISAL_ZLIB_COMPRESSOBJ_METHODDEF,
1238-
ISAL_ZLIB_DECOMPRESSOBJ_METHODDEF,
1239-
{NULL, NULL, 0, NULL} /* Sentinel */
1240-
};
12411225

12421226
static PyMethodDef comp_methods[] = {
12431227
ISAL_ZLIB_COMPRESS_COMPRESS_METHODDEF,
@@ -1464,16 +1448,27 @@ GzipReader_read_from_file(GzipReader *self)
14641448

14651449
static PyObject *BadGzipFile; // Import BadGzipFile error for consistency
14661450

1451+
static inline uint32_t load_u32_le(void *address) {
1452+
#if PY_BIG_ENDIAN
1453+
uint8_t *mem = address;
1454+
return mem[0] | (mem[1] << 8) | (mem[2] << 16) | (mem[3] << 24);
1455+
#else
1456+
return *(uint32_t *)address;
1457+
#endif
1458+
}
1459+
1460+
static inline uint16_t load_u16_le(void *address) {
1461+
#if PY_BIG_ENDIAN
1462+
uint8_t *mem = address;
1463+
return mem[0] | (mem[1] << 8) | (mem[2] << 16) | (mem[3] << 24);
1464+
#else
1465+
return *(uint16_t *)address;
1466+
#endif
1467+
}
1468+
14671469
static Py_ssize_t
14681470
GzipReader_read_into_buffer(GzipReader *self, uint8_t *out_buffer, size_t out_buffer_size)
14691471
{
1470-
if (out_buffer_size > UINT32_MAX) {
1471-
PyErr_SetString(
1472-
PyExc_RuntimeError,
1473-
"Internal function GzipReader_read_into_buffer called "
1474-
"with a too large buffer");
1475-
return -1;
1476-
}
14771472
Py_ssize_t bytes_written = 0;
14781473
/* Outer loop is the file read in loop */
14791474
while (1) {
@@ -1517,15 +1512,15 @@ GzipReader_read_into_buffer(GzipReader *self, uint8_t *out_buffer, size_t out_bu
15171512
return -1;
15181513
}
15191514
uint8_t flags = current_pos[3];
1520-
self->_last_mtime = *(uint32_t *)(current_pos + 4);
1515+
self->_last_mtime = load_u32_le(current_pos + 4);
15211516
// Skip XFL and header flag
15221517
uint8_t *header_cursor = current_pos + 10;
15231518
if (flags & FEXTRA) {
15241519
// Read the extra field and discard it.
15251520
if (header_cursor + 2 >= buffer_end) {
15261521
break;
15271522
}
1528-
uint16_t flength = *(uint16_t *)header_cursor;
1523+
uint16_t flength = load_u16_le(header_cursor);
15291524
header_cursor += 2;
15301525
if (header_cursor + flength >= buffer_end) {
15311526
break;
@@ -1552,7 +1547,7 @@ GzipReader_read_into_buffer(GzipReader *self, uint8_t *out_buffer, size_t out_bu
15521547
if (header_cursor + 2 >= buffer_end) {
15531548
break;
15541549
}
1555-
uint16_t header_crc = *(uint16_t *)header_cursor;
1550+
uint16_t header_crc = load_u16_le(header_cursor);
15561551
uint16_t crc = crc32_gzip_refl(
15571552
0, current_pos, header_cursor - current_pos) & 0xFFFF;
15581553
if (header_crc != crc) {
@@ -1572,9 +1567,9 @@ GzipReader_read_into_buffer(GzipReader *self, uint8_t *out_buffer, size_t out_bu
15721567
self->stream_phase = GzipReader_DEFLATE_BLOCK;
15731568
case GzipReader_DEFLATE_BLOCK:
15741569
self->state.next_in = current_pos;
1575-
self->state.avail_in = buffer_end - current_pos;
1570+
self->state.avail_in = Py_MIN((buffer_end -current_pos), UINT32_MAX);
15761571
self->state.next_out = out_buffer;
1577-
self->state.avail_out = out_buffer_size;
1572+
self->state.avail_out = Py_MIN(out_buffer_size, UINT32_MAX);
15781573
int ret;
15791574
ret = isal_inflate(&self->state);
15801575
if (ret != ISAL_DECOMP_OK) {
@@ -1586,11 +1581,16 @@ GzipReader_read_into_buffer(GzipReader *self, uint8_t *out_buffer, size_t out_bu
15861581
bytes_written += current_bytes_written;
15871582
self->_pos += current_bytes_written;
15881583
out_buffer = self->state.next_out;
1589-
out_buffer_size = self->state.avail_out;
1584+
out_buffer_size -= current_bytes_written;
15901585
current_pos = self->state.next_in;
15911586
if (!(self->state.block_state == ISAL_BLOCK_FINISH)) {
1592-
if (self->state.avail_out > 0) {
1593-
break;
1587+
if (out_buffer_size > 0) {
1588+
if (current_pos == buffer_end) {
1589+
// Need fresh bytes
1590+
break;
1591+
}
1592+
// Not all input data decompressed.
1593+
continue;
15941594
}
15951595
self->current_pos = current_pos;
15961596
Py_BLOCK_THREADS;
@@ -1603,7 +1603,7 @@ GzipReader_read_into_buffer(GzipReader *self, uint8_t *out_buffer, size_t out_bu
16031603
if (buffer_end - current_pos < 8) {
16041604
break;
16051605
}
1606-
uint32_t crc = *(uint32_t *)current_pos;
1606+
uint32_t crc = load_u32_le(current_pos);
16071607
current_pos += 4;
16081608
if (crc != self->state.crc) {
16091609
Py_BLOCK_THREADS;
@@ -1614,7 +1614,7 @@ GzipReader_read_into_buffer(GzipReader *self, uint8_t *out_buffer, size_t out_bu
16141614
);
16151615
return -1;
16161616
}
1617-
uint32_t length = *(uint32_t *)current_pos;
1617+
uint32_t length = load_u32_le(current_pos);
16181618
current_pos += 4;
16191619
if (length != self->state.total_out) {
16201620
Py_BLOCK_THREADS;
@@ -1624,17 +1624,14 @@ GzipReader_read_into_buffer(GzipReader *self, uint8_t *out_buffer, size_t out_bu
16241624
self->stream_phase = GzipReader_NULL_BYTES;
16251625
case GzipReader_NULL_BYTES:
16261626
// There maybe NULL bytes between gzip members
1627-
while (current_pos < buffer_end) {
1628-
if (*current_pos != 0) {
1629-
self->stream_phase = GzipReader_HEADER;
1630-
break;
1631-
}
1627+
while (current_pos < buffer_end && *current_pos == 0) {
16321628
current_pos += 1;
16331629
}
1634-
if (current_pos >= buffer_end) {
1630+
if (current_pos == buffer_end) {
1631+
/* Not all NULL bytes may have been read, refresh the buffer.*/
16351632
break;
16361633
}
1637-
// Continue to prevent refreshing the buffer for each block.
1634+
self->stream_phase = GzipReader_HEADER;
16381635
continue;
16391636
default:
16401637
Py_UNREACHABLE();
@@ -1671,7 +1668,7 @@ GzipReader_readinto(GzipReader *self, PyObject *buffer_obj)
16711668
return NULL;
16721669
}
16731670
uint8_t *buffer = view.buf;
1674-
size_t buffer_size = Py_MIN(view.len, UINT32_MAX);
1671+
size_t buffer_size = view.len;
16751672
ENTER_ZLIB(self);
16761673
Py_ssize_t written_size = GzipReader_read_into_buffer(self, buffer, buffer_size);
16771674
LEAVE_ZLIB(self);
@@ -1944,6 +1941,7 @@ static PyTypeObject GzipReader_Type = {
19441941
.tp_flags = Py_TPFLAGS_DEFAULT,
19451942
.tp_dealloc = (destructor)GzipReader_dealloc,
19461943
.tp_new = (newfunc)(GzipReader__new__),
1944+
.tp_doc = GzipReader__new____doc__,
19471945
.tp_methods = GzipReader_methods,
19481946
.tp_getset = GzipReader_properties,
19491947
};
@@ -1964,6 +1962,23 @@ PyDoc_STRVAR(isal_zlib_module_documentation,
19641962
"Compressor objects support compress() and flush() methods; decompressor\n"
19651963
"objects support decompress() and flush().");
19661964

1965+
typedef struct {
1966+
PyTypeObject *Comptype;
1967+
PyTypeObject *Decomptype;
1968+
PyObject *IsalError;
1969+
} isal_zlib_state;
1970+
1971+
static PyMethodDef IsalZlibMethods[] = {
1972+
ISAL_ZLIB_ADLER32_METHODDEF,
1973+
ISAL_ZLIB_CRC32_METHODDEF,
1974+
ISAL_ZLIB_CRC32_COMBINE_METHODDEF,
1975+
ISAL_ZLIB_COMPRESS_METHODDEF,
1976+
ISAL_ZLIB_DECOMPRESS_METHODDEF,
1977+
ISAL_ZLIB_COMPRESSOBJ_METHODDEF,
1978+
ISAL_ZLIB_DECOMPRESSOBJ_METHODDEF,
1979+
{NULL, NULL, 0, NULL} /* Sentinel */
1980+
};
1981+
19671982
static struct PyModuleDef isal_zlib_module = {
19681983
PyModuleDef_HEAD_INIT,
19691984
"isal_zlib", /* name of module */

0 commit comments

Comments
 (0)