Skip to content

Commit f71608e

Browse files
committed
expose zlib.{adler32,crc32}_combine
1 parent 2fd09b0 commit f71608e

File tree

3 files changed

+297
-1
lines changed

3 files changed

+297
-1
lines changed

Lib/test/test_zlib.py

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,104 @@ def test_same_as_binascii_crc32(self):
119119
self.assertEqual(binascii.crc32(b'spam'), zlib.crc32(b'spam'))
120120

121121

122+
class ChecksumCombineMixin:
123+
"""Mixin class for testing checksum combination."""
124+
125+
N = 1000
126+
default_iv: int
127+
128+
def parse_iv(self, iv):
129+
"""Parse an IV value.
130+
131+
- The default IV is returned if *iv* is None.
132+
- A random IV is returned if *iv* is -1.
133+
- Otherwise, *iv* is returned as is.
134+
"""
135+
if iv is None:
136+
return self.default_iv
137+
if iv == -1:
138+
return random.randint(1, 0x80000000)
139+
return iv
140+
141+
def checksum(self, data, init=None):
142+
"""Compute the checksum of data with a given initial value.
143+
144+
The *init* value is parsed by ``parse_iv``.
145+
"""
146+
iv = self.parse_iv(init)
147+
return self._checksum(data, iv)
148+
149+
def _checksum(self, data, init):
150+
raise NotImplementedError
151+
152+
def combine(self, a, b, blen):
153+
"""Combine two checksums together."""
154+
raise NotImplementedError
155+
156+
def get_random_data(self, data_len, *, iv=None):
157+
"""Get a triplet (data, iv, checksum)."""
158+
data = random.randbytes(data_len)
159+
init = self.parse_iv(iv)
160+
checksum = self.checksum(data, init)
161+
return data, init, checksum
162+
163+
def test_combine_empty(self):
164+
for _ in range(self.N):
165+
a, iv, checksum = self.get_random_data(32, iv=-1)
166+
res = self.combine(iv, self.checksum(a), len(a))
167+
self.assertEqual(res, checksum)
168+
169+
def test_combine_no_iv(self):
170+
for _ in range(self.N):
171+
a, _, chk_a = self.get_random_data(32)
172+
b, _, chk_b = self.get_random_data(64)
173+
res = self.combine(chk_a, chk_b, len(b))
174+
self.assertEqual(res, self.checksum(a + b))
175+
176+
def test_combine_with_iv(self):
177+
for _ in range(self.N):
178+
a, iv_a, chk_a_with_iv = self.get_random_data(32, iv=-1)
179+
chk_a_no_iv = self.checksum(a)
180+
b, iv_b, chk_b_with_iv = self.get_random_data(64, iv=-1)
181+
chk_b_no_iv = self.checksum(b)
182+
183+
# We can represent c = COMBINE(CHK(a, iv_a), CHK(b, iv_b)) as:
184+
#
185+
# c = CHK(CHK(b'', iv_a) + CHK(a) + CHK(b'', iv_b) + CHK(b))
186+
# = COMBINE(
187+
# COMBINE(CHK(b'', iv_a), CHK(a)),
188+
# COMBINE(CHK(b'', iv_b), CHK(b)),
189+
# )
190+
# = COMBINE(COMBINE(iv_a, CHK(a)), COMBINE(iv_b, CHK(b)))
191+
tmp0 = self.combine(iv_a, chk_a_no_iv, len(a))
192+
tmp1 = self.combine(iv_b, chk_b_no_iv, len(b))
193+
expected = self.combine(tmp0, tmp1, len(b))
194+
checksum = self.combine(chk_a_with_iv, chk_b_with_iv, len(b))
195+
self.assertEqual(checksum, expected)
196+
197+
198+
class CRC32CombineTestCase(ChecksumCombineMixin, unittest.TestCase):
199+
200+
default_iv = 0
201+
202+
def _checksum(self, data, init):
203+
return zlib.crc32(data, init)
204+
205+
def combine(self, a, b, blen):
206+
return zlib.crc32_combine(a, b, blen)
207+
208+
209+
class Adler32CombineTestCase(ChecksumCombineMixin, unittest.TestCase):
210+
211+
default_iv = 1
212+
213+
def _checksum(self, data, init):
214+
return zlib.adler32(data, init)
215+
216+
def combine(self, a, b, blen):
217+
return zlib.adler32_combine(a, b, blen)
218+
219+
122220
# Issue #10276 - check that inputs >=4 GiB are handled correctly.
123221
class ChecksumBigBufferTestCase(unittest.TestCase):
124222

Modules/clinic/zlibmodule.c.h

Lines changed: 111 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Modules/zlibmodule.c

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,16 @@
1717
#error "At least zlib version 1.2.2.1 is required"
1818
#endif
1919

20+
#if (SIZEOF_OFF_T == SIZEOF_SIZE_T)
21+
# define convert_to_z_off_t PyLong_AsSsize_t
22+
#elif (SIZEOF_OFF_T == SIZEOF_LONG_LONG)
23+
# define convert_to_z_off_t PyLong_AsLongLong
24+
#elif (SIZEOF_OFF_T == SIZEOF_LONG)
25+
# define convert_to_z_off_t PyLong_AsLong
26+
#else
27+
# error off_t does not match either size_t, long, or long long!
28+
#endif
29+
2030
// Blocks output buffer wrappers
2131
#include "pycore_blocks_output_buffer.h"
2232

@@ -1876,6 +1886,44 @@ zlib_adler32_impl(PyObject *module, Py_buffer *data, unsigned int value)
18761886
return PyLong_FromUnsignedLong(value & 0xffffffffU);
18771887
}
18781888

1889+
/*[clinic input]
1890+
zlib.adler32_combine -> unsigned_int
1891+
1892+
adler1: unsigned_int(bitwise=True)
1893+
Adler-32 check value for sequence A
1894+
1895+
adler2: unsigned_int(bitwise=True)
1896+
Adler-32 check value for sequence B
1897+
1898+
blen: object
1899+
Length of sequence B
1900+
/
1901+
1902+
Combine two Adler-32 check values into one.
1903+
1904+
Given an Adler-32 check value 'adler1' of a sequence A and an Adler-32 check
1905+
value 'adler2' of a sequence B of length 'blen', the returned checksum
1906+
is the Adler-32 check value of A and B concatenated.
1907+
[clinic start generated code]*/
1908+
1909+
static unsigned int
1910+
zlib_adler32_combine_impl(PyObject *module, unsigned int adler1,
1911+
unsigned int adler2, PyObject *blen)
1912+
/*[clinic end generated code: output=57aee1d70f5e2908 input=29005ae6aaa024b3]*/
1913+
{
1914+
#if defined(Z_WANT64)
1915+
z_off64_t len = convert_to_z_off_t(blen);
1916+
#else
1917+
z_off_t len = convert_to_z_off_t(blen);
1918+
#endif
1919+
if (PyErr_Occurred()) {
1920+
return (unsigned int)-1;
1921+
}
1922+
return adler32_combine(adler1, adler2, len);
1923+
}
1924+
1925+
1926+
18791927
/*[clinic input]
18801928
zlib.crc32 -> unsigned_int
18811929
@@ -1923,13 +1971,50 @@ zlib_crc32_impl(PyObject *module, Py_buffer *data, unsigned int value)
19231971
return value;
19241972
}
19251973

1974+
/*[clinic input]
1975+
zlib.crc32_combine -> unsigned_int
1976+
1977+
crc1: unsigned_int(bitwise=True)
1978+
CRC-32 check value for sequence A
1979+
1980+
crc2: unsigned_int(bitwise=True)
1981+
CRC-32 check value for sequence B
1982+
1983+
blen: object
1984+
Length of sequence B
1985+
/
1986+
1987+
Combine two CRC-32 check values into one.
1988+
1989+
Given a CRC-32 check value 'crc1' of a sequence A and a CRC-32 check
1990+
value 'crc2' of a sequence B of length 'blen', the returned checksum
1991+
is the CRC-32 check value of A and B concatenated.
1992+
[clinic start generated code]*/
1993+
1994+
static unsigned int
1995+
zlib_crc32_combine_impl(PyObject *module, unsigned int crc1,
1996+
unsigned int crc2, PyObject *blen)
1997+
/*[clinic end generated code: output=dece978b27e8eada input=4d394ee4d80aa35a]*/
1998+
{
1999+
#if defined(Z_WANT64)
2000+
z_off64_t len = convert_to_z_off_t(blen);
2001+
#else
2002+
z_off_t len = convert_to_z_off_t(blen);
2003+
#endif
2004+
if (PyErr_Occurred()) {
2005+
return (unsigned int)-1;
2006+
}
2007+
return crc32_combine(crc1, crc2, len);
2008+
}
19262009

19272010
static PyMethodDef zlib_methods[] =
19282011
{
19292012
ZLIB_ADLER32_METHODDEF
2013+
ZLIB_ADLER32_COMBINE_METHODDEF
19302014
ZLIB_COMPRESS_METHODDEF
19312015
ZLIB_COMPRESSOBJ_METHODDEF
19322016
ZLIB_CRC32_METHODDEF
2017+
ZLIB_CRC32_COMBINE_METHODDEF
19332018
ZLIB_DECOMPRESS_METHODDEF
19342019
ZLIB_DECOMPRESSOBJ_METHODDEF
19352020
{NULL, NULL}
@@ -1981,14 +2066,17 @@ static PyType_Spec ZlibDecompressor_type_spec = {
19812066
.flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE),
19822067
.slots = ZlibDecompressor_type_slots,
19832068
};
2069+
19842070
PyDoc_STRVAR(zlib_module_documentation,
19852071
"The functions in this module allow compression and decompression using the\n"
19862072
"zlib library, which is based on GNU zip.\n"
19872073
"\n"
19882074
"adler32(string[, start]) -- Compute an Adler-32 checksum.\n"
2075+
"adler32_combine(adler1, adler2, len2) -- Combine two Adler-32 checksums.\n"
19892076
"compress(data[, level]) -- Compress data, with compression level 0-9 or -1.\n"
19902077
"compressobj([level[, ...]]) -- Return a compressor object.\n"
19912078
"crc32(string[, start]) -- Compute a CRC-32 checksum.\n"
2079+
"crc32_combine(crc1, crc2, len2) -- Combine two CRC-32 checksums.\n"
19922080
"decompress(string,[wbits],[bufsize]) -- Decompresses a compressed string.\n"
19932081
"decompressobj([wbits[, zdict]]) -- Return a decompressor object.\n"
19942082
"\n"

0 commit comments

Comments
 (0)