diff --git a/Doc/library/lzma.rst b/Doc/library/lzma.rst index 69f7cb8d48d7ae..e91034d23627dd 100644 --- a/Doc/library/lzma.rst +++ b/Doc/library/lzma.rst @@ -311,6 +311,23 @@ Compressing and decompressing data in memory *preset* and *filters* arguments. +.. function:: crc32(data, value=0) + + .. index:: + single: Cyclic Redundancy Check + single: checksum; Cyclic Redundancy Check + + Computes a CRC (Cyclic Redundancy Check) checksum of *data*. The + result is a positive integer, less than :math:`2^32`. If *value* is present, it is used + as the starting value of the checksum; otherwise, a default value of 0 + is used. Passing in *value* allows computing a running checksum over the + concatenation of several inputs. The algorithm is not cryptographically + strong, and should not be used for authentication or digital signatures. Since + the algorithm is designed for use as a checksum algorithm, it is not suitable + for use as a general hash algorithm. + + .. versionadded:: next + .. function:: decompress(data, format=FORMAT_AUTO, memlimit=None, filters=None) Decompress *data* (a :class:`bytes` object), returning the uncompressed data diff --git a/Lib/test/test_lzma.py b/Lib/test/test_lzma.py index e93c3c37354e27..3a02362258965f 100644 --- a/Lib/test/test_lzma.py +++ b/Lib/test/test_lzma.py @@ -1,4 +1,5 @@ import array +import binascii from io import BytesIO, UnsupportedOperation, DEFAULT_BUFFER_SIZE import os import pickle @@ -8,7 +9,7 @@ import unittest from compression._common import _streams -from test.support import _4G, bigmemtest +from test.support import _1G, _4G, bigmemtest from test.support.import_helper import import_module from test.support.os_helper import ( TESTFN, unlink, FakePath @@ -17,6 +18,44 @@ lzma = import_module("lzma") from lzma import LZMACompressor, LZMADecompressor, LZMAError, LZMAFile +class ChecksumTestCase(unittest.TestCase): + # checksum test cases + def test_crc32start(self): + self.assertEqual(lzma.crc32(b""), lzma.crc32(b"", 0)) + self.assertTrue(lzma.crc32(b"abc", 0xffffffff)) + + def test_crc32empty(self): + self.assertEqual(lzma.crc32(b"", 0), 0) + self.assertEqual(lzma.crc32(b"", 1), 1) + self.assertEqual(lzma.crc32(b"", 432), 432) + + def test_penguins(self): + self.assertEqual(lzma.crc32(b"penguin", 0), 0x0e5c1a120) + self.assertEqual(lzma.crc32(b"penguin", 1), 0x43b6aa94) + self.assertEqual(lzma.crc32(b"penguin"), lzma.crc32(b"penguin", 0)) + + def test_crc32_unsigned(self): + foo = b'abcdefghijklmnop' + # explicitly test signed behavior + self.assertEqual(lzma.crc32(foo), 2486878355) + self.assertEqual(lzma.crc32(b'spam'), 1138425661) + + def test_same_as_binascii_crc32(self): + foo = b'abcdefghijklmnop' + crc = 2486878355 + self.assertEqual(binascii.crc32(foo), crc) + self.assertEqual(lzma.crc32(foo), crc) + self.assertEqual(binascii.crc32(b'spam'), lzma.crc32(b'spam')) + + +# GH-54485 - check that inputs >=4 GiB are handled correctly. +class ChecksumBigBufferTestCase(unittest.TestCase): + + @bigmemtest(size=_4G + 4, memuse=1, dry_run=False) + def test_big_buffer(self, size): + data = b"nyan" * (_1G + 1) + self.assertEqual(lzma.crc32(data), 1044521549) + class CompressorDecompressorTestCase(unittest.TestCase): diff --git a/Misc/NEWS.d/next/Library/2025-03-18-14-18-06.gh-issue-91349.Qrnmxt.rst b/Misc/NEWS.d/next/Library/2025-03-18-14-18-06.gh-issue-91349.Qrnmxt.rst new file mode 100644 index 00000000000000..855da9d2673ca7 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-03-18-14-18-06.gh-issue-91349.Qrnmxt.rst @@ -0,0 +1 @@ +Expose the crc32 function from the lzma library as :func:`lzma.crc32`. diff --git a/Modules/_lzmamodule.c b/Modules/_lzmamodule.c index 462c2181fa6036..8b14472f3c9611 100644 --- a/Modules/_lzmamodule.c +++ b/Modules/_lzmamodule.c @@ -1602,10 +1602,40 @@ lzma_exec(PyObject *module) return 0; } +/*[clinic input] +_lzma.crc32 -> unsigned_int + + data: Py_buffer + value: unsigned_int(bitwise=True) = 0 + Starting value of the checksum. + / + +Compute a CRC-32 checksum of data. + +The returned checksum is an integer. +[clinic start generated code]*/ + +static unsigned int +_lzma_crc32_impl(PyObject *module, Py_buffer *data, unsigned int value) +/*[clinic end generated code: output=fca7916d796faf8b input=bb623a169c14534f]*/ +{ + /* Releasing the GIL for very small buffers is inefficient + and may lower performance */ + if (data->len > 1024*5) { + Py_BEGIN_ALLOW_THREADS + value = lzma_crc32(data->buf, (size_t)data->len, (uint32_t)value); + Py_END_ALLOW_THREADS + } else { + value = lzma_crc32(data->buf, (size_t)data->len, (uint32_t)value); + } + return value; +} + static PyMethodDef lzma_methods[] = { _LZMA_IS_CHECK_SUPPORTED_METHODDEF _LZMA__ENCODE_FILTER_PROPERTIES_METHODDEF _LZMA__DECODE_FILTER_PROPERTIES_METHODDEF + _LZMA_CRC32_METHODDEF {NULL} }; diff --git a/Modules/clinic/_lzmamodule.c.h b/Modules/clinic/_lzmamodule.c.h index ebdc81a0dac2f0..abad1082690785 100644 --- a/Modules/clinic/_lzmamodule.c.h +++ b/Modules/clinic/_lzmamodule.c.h @@ -333,4 +333,58 @@ _lzma__decode_filter_properties(PyObject *module, PyObject *const *args, Py_ssiz return return_value; } -/*[clinic end generated code: output=6386084cb43d2533 input=a9049054013a1b77]*/ + +PyDoc_STRVAR(_lzma_crc32__doc__, +"crc32($module, data, value=0, /)\n" +"--\n" +"\n" +"Compute a CRC-32 checksum of data.\n" +"\n" +" value\n" +" Starting value of the checksum.\n" +"\n" +"The returned checksum is an integer."); + +#define _LZMA_CRC32_METHODDEF \ + {"crc32", _PyCFunction_CAST(_lzma_crc32), METH_FASTCALL, _lzma_crc32__doc__}, + +static unsigned int +_lzma_crc32_impl(PyObject *module, Py_buffer *data, unsigned int value); + +static PyObject * +_lzma_crc32(PyObject *module, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + Py_buffer data = {NULL, NULL}; + unsigned int value = 0; + unsigned int _return_value; + + if (!_PyArg_CheckPositional("crc32", nargs, 1, 2)) { + goto exit; + } + if (PyObject_GetBuffer(args[0], &data, PyBUF_SIMPLE) != 0) { + goto exit; + } + if (nargs < 2) { + goto skip_optional; + } + value = (unsigned int)PyLong_AsUnsignedLongMask(args[1]); + if (value == (unsigned int)-1 && PyErr_Occurred()) { + goto exit; + } +skip_optional: + _return_value = _lzma_crc32_impl(module, &data, value); + if ((_return_value == (unsigned int)-1) && PyErr_Occurred()) { + goto exit; + } + return_value = PyLong_FromUnsignedLong((unsigned long)_return_value); + +exit: + /* Cleanup for data */ + if (data.obj) { + PyBuffer_Release(&data); + } + + return return_value; +} +/*[clinic end generated code: output=b6591cb074aa87b6 input=a9049054013a1b77]*/