Skip to content

Commit 30ee557

Browse files
committed
Merge branch 'enable-vpclmulqdq-on-rust-1.89'
2 parents fd20852 + f7ac239 commit 30ee557

File tree

9 files changed

+56
-67
lines changed

9 files changed

+56
-67
lines changed

.github/workflows/tests.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ jobs:
1313
os: [ubuntu-latest, ubuntu-22.04-arm, ubuntu-24.04-arm, macos-latest]
1414
rust-toolchain:
1515
- "1.81" # minimum for this crate
16+
- "1.89" # when VPCLMULQDQ was stabilized
1617
- "stable"
1718
- "nightly"
1819
runs-on: ${{ matrix.os }}
@@ -44,6 +45,7 @@ jobs:
4445
target: [i586-unknown-linux-gnu, i686-unknown-linux-gnu]
4546
rust-toolchain:
4647
- "1.81" # minimum for this crate
48+
- "1.89" # when VPCLMULQDQ was stabilized
4749
- "stable"
4850
- "nightly"
4951
steps:
@@ -69,6 +71,7 @@ jobs:
6971
target: [powerpc-unknown-linux-gnu, powerpc64-unknown-linux-gnu]
7072
rust-toolchain:
7173
- "1.81" # minimum for this crate
74+
- "1.89" # when VPCLMULQDQ was stabilized
7275
- "stable"
7376
- "nightly"
7477
steps:

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# Changes for crc-fast-rust
22

33
## [1.4.0](https://github.com/awesomized/crc-fast-rust/releases/tag/1.4.0) - 2025-08-08
4+
* [Enable VPCLMULQDQ support on Rust 1.89+](https://github.com/awesomized/crc-fast-rust/pull/10)
45
* [Support custom CRC parameters](https://github.com/awesomized/crc-fast-rust/pull/11)
56
* [Add checksum command-line utility](https://github.com/awesomized/crc-fast-rust/pull/12)
67
* [Remove bindgen](https://github.com/awesomized/crc-fast-rust/pull/13)

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ license = "MIT OR Apache-2.0"
77
keywords = ["crc", "checksum", "simd", "accelerated", "fast"]
88
categories = ["algorithms", "encoding", "hardware-support"]
99
repository = "https://github.com/awesomized/crc-fast-rust"
10-
description = "Fast, hardware-accelerated CRC-32 and CRC-64 checksum calculation using SIMD"
10+
description = "World's fastest generic CRC32 and CRC64 calculator using SIMD. Supplies a C-compatible shared library for use in other languages."
1111
readme = "README.md"
1212

1313
# 1.69.0 added VPCLMULQDQ x86 detection support, 1.70.0 added LLVM 16 which supports PMULL2 on Aarch64
@@ -24,6 +24,7 @@ digest = { version = "0.10", features = ["alloc"] }
2424
rand = "0.9"
2525
libc = "0.2"
2626
regex = "1.11"
27+
rustversion = "1.0"
2728

2829
[dev-dependencies]
2930
criterion = "0.7"
@@ -43,10 +44,8 @@ harness = false
4344
[features]
4445
alloc = []
4546

46-
# enable experimental VPCLMULQDQ support, which landed in Rust 1.89.0-nightly, will deprecate after 1.89.0 is stable
47-
vpclmulqdq = []
48-
49-
# the features below aren't in use, are deprecated, and will be removed in the next MAJOR version
47+
# the features below are deprecated, aren't in use, and will be removed in the next MAJOR version (v2)
48+
vpclmulqdq = [] # deprecated, VPCLMULQDQ stabilized in Rust 1.89.0
5049
optimize_crc32_auto = [] # deprecated
5150
optimize_crc32_neon_eor3_v9s3x2e_s3 = [] # deprecated
5251
optimize_crc32_neon_v12e_v1 = [] # deprecated

README.md

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,22 +5,22 @@
55
[![Latest Version](https://img.shields.io/crates/v/crc-fast.svg)](https://crates.io/crates/crc-fast)
66
[![Documentation](https://img.shields.io/badge/api-rustdoc-blue.svg)](https://docs.rs/crc-fast)
77

8-
Fast, hardware-accelerated CRC calculation for
9-
[all known CRC-32 and CRC-64 variants](https://reveng.sourceforge.io/crc-catalogue/all.htm) using SIMD intrinsics,
8+
World's fastest generic CRC calculator for
9+
[all known CRC-32 and CRC-64 variants](https://reveng.sourceforge.io/crc-catalogue/all.htm), as well as bring-your-own
10+
custom parameters, using SIMD intrinsics,
1011
which can exceed [100GiB/s](#performance) on modern systems.
1112

12-
Supports acceleration on `aarch64`, `x86_64`, and `x86` architectures, plus has a safe non-accelerated table-based
13+
Supports acceleration on `aarch64`, `x86_64`, and `x86` architectures, plus has a safe non-accelerated table-based
1314
software fallback for others.
1415

1516
The [crc crate](https://crates.io/crates/crc) is ~0.5GiB/s by default, so this is
16-
[up to >220X faster](#tldr-just-tell-me-how-to-turn-it-up-to-11-), and even the most conservative baseline settings
17-
are >27X.
17+
[up to >220X faster](#tldr-just-tell-me-how-to-turn-it-up-to-11-).
1818

1919
This is unique, not just because of the performance, but also because I couldn't find a single generic SIMD-accelerated
2020
implementation (in any language) which worked for _all_ known variants, using the
2121
[Rocksoft model](http://www.ross.net/crc/download/crc_v3.txt), especially the "non-reflected" variants.
2222

23-
So I wrote one.
23+
So I wrote one. :)
2424

2525
## Other languages
2626

@@ -144,7 +144,8 @@ assert_eq!(checksum.unwrap(), 0xcbf43926);
144144

145145
## Custom CRC Parameters
146146

147-
For cases where you need to use CRC variants not included in the predefined algorithms, you can define custom CRC parameters and use the `*_with_params` functions.
147+
For cases where you need to use CRC variants not included in the predefined algorithms, you can define custom CRC
148+
parameters and use the `*_with_params` functions.
148149

149150
### Digest with custom parameters
150151

src/arch/mod.rs

Lines changed: 10 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@ use aarch64::AArch64Ops;
2222
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
2323
use x86::X86Ops;
2424

25-
//#[rustversion::since(1.89)]
26-
#[cfg(all(target_arch = "x86_64", feature = "vpclmulqdq"))]
25+
#[rustversion::since(1.89)]
26+
#[cfg(target_arch = "x86_64")]
2727
use vpclmulqdq::Vpclmulqdq512Ops;
2828

2929
mod aarch64;
@@ -49,28 +49,25 @@ pub(crate) unsafe fn update(state: u64, bytes: &[u8], params: CrcParams) -> u64
4949
}
5050
}
5151

52-
//#[rustversion::before(1.89)]
52+
#[rustversion::before(1.89)]
5353
#[inline]
54-
#[cfg(all(
55-
not(feature = "vpclmulqdq"),
56-
any(target_arch = "x86", target_arch = "x86_64")
57-
))]
54+
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
5855
#[target_feature(enable = "ssse3,sse4.1,pclmulqdq")]
5956
pub(crate) unsafe fn update(state: u64, bytes: &[u8], params: CrcParams) -> u64 {
6057
update_x86_sse(state, bytes, params)
6158
}
6259

63-
//#[rustversion::since(1.89)]
60+
#[rustversion::since(1.89)]
6461
#[inline]
65-
#[cfg(all(feature = "vpclmulqdq", target_arch = "x86"))]
62+
#[cfg(target_arch = "x86")]
6663
#[target_feature(enable = "ssse3,sse4.1,pclmulqdq")]
6764
pub(crate) unsafe fn update(state: u64, bytes: &[u8], params: CrcParams) -> u64 {
6865
update_x86_sse(state, bytes, params)
6966
}
7067

71-
//#[rustversion::since(1.89)]
68+
#[rustversion::since(1.89)]
7269
#[inline]
73-
#[cfg(all(feature = "vpclmulqdq", target_arch = "x86_64"))]
70+
#[cfg(target_arch = "x86_64")]
7471
#[target_feature(enable = "ssse3,sse4.1,pclmulqdq")]
7572
pub(crate) unsafe fn update(state: u64, bytes: &[u8], params: CrcParams) -> u64 {
7673
use std::arch::is_x86_feature_detected;
@@ -117,8 +114,7 @@ unsafe fn update_x86_sse(state: u64, bytes: &[u8], params: CrcParams) -> u64 {
117114
}
118115
}
119116

120-
//#[rustversion::before(1.89)]
121-
#[cfg(not(feature = "vpclmulqdq"))]
117+
#[rustversion::before(1.89)]
122118
pub fn get_target() -> String {
123119
#[cfg(target_arch = "aarch64")]
124120
{
@@ -137,8 +133,7 @@ pub fn get_target() -> String {
137133
return "software-fallback-tables".to_string();
138134
}
139135

140-
//#[rustversion::since(1.89)]
141-
#[cfg(feature = "vpclmulqdq")]
136+
#[rustversion::since(1.89)]
142137
pub fn get_target() -> String {
143138
#[cfg(target_arch = "aarch64")]
144139
{

src/arch/vpclmulqdq.rs

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -4,33 +4,33 @@
44
//!
55
//! It performs folding using 4 x ZMM registers of 512-bits each.
66
7-
#![cfg(all(target_arch = "x86_64", feature = "vpclmulqdq"))]
7+
#![cfg(target_arch = "x86_64")]
88

9-
//#[rustversion::since(1.89)]
9+
#[rustversion::since(1.89)]
1010
use crate::arch::x86::X86Ops;
1111

12-
//#[rustversion::since(1.89)]
12+
#[rustversion::since(1.89)]
1313
use crate::enums::Reflector;
1414

15-
//#[rustversion::since(1.89)]
15+
#[rustversion::since(1.89)]
1616
use crate::structs::CrcState;
1717

18-
//#[rustversion::since(1.89)]
18+
#[rustversion::since(1.89)]
1919
use crate::traits::{ArchOps, EnhancedCrcWidth};
2020

21-
//#[rustversion::since(1.89)]
21+
#[rustversion::since(1.89)]
2222
use std::arch::x86_64::*;
2323

24-
//#[rustversion::since(1.89)]
24+
#[rustversion::since(1.89)]
2525
use std::ops::BitXor;
2626

2727
/// Implements the ArchOps trait using 512-bit AVX-512 and VPCLMULQDQ instructions at 512 bits.
2828
/// Delegates to X86Ops for standard 128-bit operations
29-
//#[rustversion::since(1.89)]
29+
#[rustversion::since(1.89)]
3030
#[derive(Debug, Copy, Clone)]
3131
pub struct Vpclmulqdq512Ops(X86Ops);
3232

33-
//#[rustversion::since(1.89)]
33+
#[rustversion::since(1.89)]
3434
impl Vpclmulqdq512Ops {
3535
#[inline(always)]
3636
pub fn new() -> Self {
@@ -39,11 +39,11 @@ impl Vpclmulqdq512Ops {
3939
}
4040

4141
// Wrapper for __m512i to make it easier to work with
42-
//#[rustversion::since(1.89)]
42+
#[rustversion::since(1.89)]
4343
#[derive(Debug, Copy, Clone)]
4444
struct Simd512(__m512i);
4545

46-
//#[rustversion::since(1.89)]
46+
#[rustversion::since(1.89)]
4747
impl Simd512 {
4848
#[inline]
4949
#[target_feature(enable = "avx512f")]
@@ -112,7 +112,7 @@ impl Simd512 {
112112
}
113113
}
114114

115-
//#[rustversion::since(1.89)]
115+
#[rustversion::since(1.89)]
116116
impl Vpclmulqdq512Ops {
117117
/// Process aligned blocks using VPCLMULQDQ with 4 x 512-bit registers
118118
///
@@ -339,15 +339,15 @@ impl Vpclmulqdq512Ops {
339339
}
340340

341341
// 512-bit version of the Reflector
342-
//#[rustversion::since(1.89)]
342+
#[rustversion::since(1.89)]
343343
#[derive(Clone, Copy)]
344344
enum Reflector512 {
345345
NoReflector,
346346
ForwardReflector { smask: Simd512 },
347347
}
348348

349349
// Function to create the appropriate reflector based on CRC parameters
350-
//#[rustversion::since(1.89)]
350+
#[rustversion::since(1.89)]
351351
#[inline(always)]
352352
unsafe fn create_reflector512(reflected: bool) -> Reflector512 {
353353
if reflected {
@@ -369,7 +369,7 @@ unsafe fn create_reflector512(reflected: bool) -> Reflector512 {
369369
}
370370

371371
// Function to apply reflection to a 512-bit vector
372-
//#[rustversion::since(1.89)]
372+
#[rustversion::since(1.89)]
373373
#[inline(always)]
374374
unsafe fn reflect_bytes512(reflector: &Reflector512, data: Simd512) -> Simd512 {
375375
match reflector {
@@ -379,12 +379,12 @@ unsafe fn reflect_bytes512(reflector: &Reflector512, data: Simd512) -> Simd512 {
379379
}
380380

381381
// pre-compute the reverse indices for 512-bit shuffling
382-
//#[rustversion::since(1.89)]
382+
#[rustversion::since(1.89)]
383383
static REVERSE_INDICES_512: __m512i =
384384
unsafe { std::mem::transmute([7u64, 6u64, 5u64, 4u64, 3u64, 2u64, 1u64, 0u64]) };
385385

386386
// Implement a 512-bit byte shuffle function
387-
//#[rustversion::since(1.89)]
387+
#[rustversion::since(1.89)]
388388
#[inline]
389389
#[target_feature(enable = "avx512f,avx512bw")]
390390
unsafe fn shuffle_bytes512(data: Simd512, mask: Simd512) -> Simd512 {
@@ -396,7 +396,7 @@ unsafe fn shuffle_bytes512(data: Simd512, mask: Simd512) -> Simd512 {
396396
}
397397

398398
// Delegate all ArchOps methods to the inner X86Ops instance
399-
//#[rustversion::since(1.89)]
399+
#[rustversion::since(1.89)]
400400
impl ArchOps for Vpclmulqdq512Ops {
401401
type Vector = __m128i;
402402

src/arch/x86.rs

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -227,9 +227,8 @@ impl ArchOps for X86Ops {
227227
_mm_clmulepi64_si128(a, b, 0x11)
228228
}
229229

230-
//#[rustversion::since(1.89)]
230+
#[rustversion::since(1.89)]
231231
#[inline]
232-
#[cfg(feature = "vpclmulqdq")]
233232
#[target_feature(enable = "avx512f,avx512vl")]
234233
unsafe fn xor3_vectors(
235234
&self,
@@ -244,9 +243,8 @@ impl ArchOps for X86Ops {
244243
self.xor3_vectors_sse(a, b, c)
245244
}
246245

247-
//#[rustversion::before(1.89)]
246+
#[rustversion::before(1.89)]
248247
#[inline]
249-
#[cfg(not(feature = "vpclmulqdq"))]
250248
#[target_feature(enable = "sse4.1")]
251249
unsafe fn xor3_vectors(
252250
&self,
@@ -321,9 +319,8 @@ impl X86Ops {
321319
}
322320
}
323321

324-
//#[rustversion::since(1.89)]
322+
#[rustversion::since(1.89)]
325323
#[inline]
326-
#[cfg(feature = "vpclmulqdq")]
327324
#[target_feature(enable = "avx512f,avx512vl")]
328325
unsafe fn xor3_vectors_avx512(&self, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
329326
_mm_ternarylogic_epi64(

0 commit comments

Comments
 (0)