Skip to content

Commit 5339064

Browse files
authored
Add Rust implementation of CRC32C (#83)
1 parent 55d1ed0 commit 5339064

File tree

9 files changed

+144
-14
lines changed

9 files changed

+144
-14
lines changed
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
use pyo3::prelude::*;
2+
3+
mod crc32c;
4+
5+
pub fn create_submodule(m: &Bound<'_, PyModule>) -> PyResult<()> {
6+
let submodule = PyModule::new(m.py(), "hash")?;
7+
crc32c::create_submodule(&submodule)?;
8+
m.add_submodule(&submodule)
9+
}
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
use pyo3::prelude::*;
2+
3+
const _TABLE: [u32; 256] = [
4+
0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4, 0xc79a971f, 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb,
5+
0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b, 0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24,
6+
0x105ec76f, 0xe235446c, 0xf165b798, 0x030e349b, 0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384,
7+
0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54, 0x5d1d08bf, 0xaf768bbc, 0xbc267848, 0x4e4dfb4b,
8+
0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a, 0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35,
9+
0xaa64d611, 0x580f5512, 0x4b5fa6e6, 0xb93425e5, 0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa,
10+
0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45, 0xf779deae, 0x05125dad, 0x1642ae59, 0xe4292d5a,
11+
0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a, 0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595,
12+
0x417b1dbc, 0xb3109ebf, 0xa0406d4b, 0x522bee48, 0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957,
13+
0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687, 0x0c38d26c, 0xfe53516f, 0xed03a29b, 0x1f682198,
14+
0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927, 0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38,
15+
0xdbfc821c, 0x2997011f, 0x3ac7f2eb, 0xc8ac71e8, 0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7,
16+
0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096, 0xa65c047d, 0x5437877e, 0x4767748a, 0xb50cf789,
17+
0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859, 0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46,
18+
0x7198540d, 0x83f3d70e, 0x90a324fa, 0x62c8a7f9, 0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6,
19+
0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36, 0x3cdb9bdd, 0xceb018de, 0xdde0eb2a, 0x2f8b6829,
20+
0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c, 0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93,
21+
0x082f63b7, 0xfa44e0b4, 0xe9141340, 0x1b7f9043, 0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c,
22+
0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3, 0x55326b08, 0xa759e80b, 0xb4091bff, 0x466298fc,
23+
0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c, 0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033,
24+
0xa24bb5a6, 0x502036a5, 0x4370c551, 0xb11b4652, 0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d,
25+
0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d, 0xef087a76, 0x1d63f975, 0x0e330a81, 0xfc588982,
26+
0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d, 0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622,
27+
0x38cc2a06, 0xcaa7a905, 0xd9f75af1, 0x2b9cd9f2, 0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed,
28+
0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530, 0x0417b1db, 0xf67c32d8, 0xe52cc12c, 0x1747422f,
29+
0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff, 0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0,
30+
0xd3d3e1ab, 0x21b862a8, 0x32e8915c, 0xc083125f, 0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540,
31+
0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90, 0x9e902e7b, 0x6cfbad78, 0x7fab5e8c, 0x8dc0dd8f,
32+
0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee, 0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1,
33+
0x69e9f0d5, 0x9b8273d6, 0x88d28022, 0x7ab90321, 0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e,
34+
0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81, 0x34f4f86a, 0xc69f7b69, 0xd5cf889d, 0x27a40b9e,
35+
0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e, 0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351,
36+
];
37+
38+
/// Update CRC32C checksum with data.
39+
///
40+
/// Args:
41+
/// crc: The initial value of the checksum.
42+
/// data: The data to update the checksum with.
43+
///
44+
/// Returns:
45+
/// The computed CRC32C.
46+
///
47+
#[pyfunction]
48+
#[pyo3(signature = (crc, data))]
49+
fn update(py: Python<'_>, crc: u32, data: Vec<u8>) -> PyResult<PyObject> {
50+
let mut crc = crc ^ 0xFFFFFFFF;
51+
for &b in &data {
52+
crc = _TABLE[((crc ^ b as u32) & 0xFF) as usize] ^ ((crc >> 8) & 0xFFFFFFFF);
53+
}
54+
Ok((crc ^ 0xFFFFFFFF).into_pyobject(py)?.into())
55+
}
56+
57+
/// Calculate CRC32C checksum of some data, with an optional initial value.
58+
///
59+
/// Args:
60+
/// data: The data to calculate the checksum of.
61+
/// value: The initial value of the checksum. Default is 0.
62+
///
63+
/// Returns:
64+
/// The computed CRC32C.
65+
///
66+
#[pyfunction]
67+
#[pyo3(signature = (data, value=0))]
68+
fn crc32c(py: Python<'_>, data: Vec<u8>, value: u32) -> PyResult<PyObject> {
69+
update(py, value, data)
70+
}
71+
72+
pub fn create_submodule(m: &Bound<'_, PyModule>) -> PyResult<()> {
73+
let submodule = PyModule::new(m.py(), "crc32c")?;
74+
submodule.add_function(wrap_pyfunction!(update, m)?)?;
75+
submodule.add_function(wrap_pyfunction!(crc32c, m)?)?;
76+
m.add_submodule(&submodule)
77+
}
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
use pyo3::prelude::*;
22

33
mod compression;
4+
mod hash;
45

56
#[pymodule(gil_used = false)]
67
fn _native(m: &Bound<'_, PyModule>) -> PyResult<()> {
78
compression::create_submodule(m)?;
9+
hash::create_submodule(m)?;
810
Ok(())
911
}

dissect/util/_native/__init__.pyi

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
from dissect.util._native import compression
1+
from dissect.util._native import compression, hash
22

3-
__all__ = ["compression"]
3+
__all__ = ["compression", "hash"]
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from dissect.util._native.hash import crc32c
2+
3+
__all__ = ["crc32c"]
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
def update(crc: int, data: bytes) -> int: ...
2+
def crc32c(data: bytes, value: int = 0) -> int: ...

dissect/util/hash/__init__.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
from dissect.util.hash import crc32c
2+
3+
crc32c_python = crc32c
4+
5+
# This selects between a native Rust version of crc32c (when available) and our own
6+
# pure-Python implementation.
7+
#
8+
# By doing a:
9+
# from dissect.util.hash import crc32c
10+
#
11+
# in another project will automatically give you one or the other.
12+
#
13+
# The native Rust version is also available as dissect.util.hash.crc32c_native (when available)
14+
# and the pure Python version is always available as dissect.util.hash.crc32c_python.
15+
try:
16+
from dissect.util import _native
17+
18+
crc32c = crc32c_native = _native.hash.crc32c
19+
except (ImportError, AttributeError):
20+
crc32c_native = None
21+
22+
__all__ = [
23+
"crc32c",
24+
"crc32c_native",
25+
"crc32c_python",
26+
"jenkins",
27+
]

tests/conftest.py

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,24 +24,33 @@ def pytest_addoption(parser: pytest.Parser) -> None:
2424
)
2525

2626

27-
def _native_or_python(name: str, request: pytest.FixtureRequest) -> ModuleType:
28-
from dissect.util import compression
29-
27+
def _native_or_python(module: ModuleType, name: str, request: pytest.FixtureRequest) -> ModuleType:
3028
if request.param:
31-
if not (module := getattr(compression, f"{name}_native", None)):
29+
if not (module := getattr(module, f"{name}_native", None)):
3230
(pytest.fail if request.config.getoption("--force-native") else pytest.skip)(
3331
"_native module is unavailable"
3432
)
3533

3634
return module
37-
return getattr(compression, f"{name}_python", None)
35+
return getattr(module, f"{name}_python", None)
3836

3937

4038
@pytest.fixture(scope="session", params=[True, False], ids=["native", "python"])
4139
def lz4(request: pytest.FixtureRequest) -> ModuleType:
42-
return _native_or_python("lz4", request)
40+
from dissect.util import compression
41+
42+
return _native_or_python(compression, "lz4", request)
4343

4444

4545
@pytest.fixture(scope="session", params=[True, False], ids=["native", "python"])
4646
def lzo(request: pytest.FixtureRequest) -> ModuleType:
47-
return _native_or_python("lzo", request)
47+
from dissect.util import compression
48+
49+
return _native_or_python(compression, "lzo", request)
50+
51+
52+
@pytest.fixture(scope="session", params=[True, False], ids=["native", "python"])
53+
def crc32c(request: pytest.FixtureRequest) -> ModuleType:
54+
from dissect.util import hash
55+
56+
return _native_or_python(hash, "crc32c", request)

tests/test_hash.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,11 @@
44

55
import pytest
66

7-
from dissect.util.hash.crc32c import crc32c
87
from dissect.util.hash.jenkins import lookup8, lookup8_quads
98

109
if TYPE_CHECKING:
10+
from types import ModuleType
11+
1112
from pytest_benchmark.fixture import BenchmarkFixture
1213

1314

@@ -30,13 +31,13 @@
3031
(bytes(reversed(range(32))), 0, 0x113FDB5C),
3132
],
3233
)
33-
def test_crc32c(data: bytes, value: int, expected: int) -> None:
34-
assert crc32c(data, value) == expected
34+
def test_crc32c(crc32c: ModuleType, data: bytes, value: int, expected: int) -> None:
35+
assert crc32c.crc32c(data, value) == expected
3536

3637

3738
@pytest.mark.benchmark
38-
def test_crc32c_benchmark(benchmark: BenchmarkFixture) -> None:
39-
benchmark(crc32c, b"hello, world!", 0)
39+
def test_crc32c_benchmark(crc32c: ModuleType, benchmark: BenchmarkFixture) -> None:
40+
benchmark(crc32c.crc32c, b"hello, world!", 0)
4041

4142

4243
def test_lookup8_remainder() -> None:

0 commit comments

Comments
 (0)