Skip to content

Commit c2f30c7

Browse files
committed
Use 4x512-bit registers instead of 2x256-bit
Only for x86_64 CPUs supporting VPCLMULQD. Gated behind builds using +nightly with the “vpclmulqdq” feature flag. Provides nearly a 2X boost in throughput. CRC-64/NVME is now ~96GiB/s on Intel Sapphire Rapids (AWS EC2 c7i.metal-48xl), up from ~56GiB/s.
1 parent 2e0fbff commit c2f30c7

File tree

4 files changed

+347
-210
lines changed

4 files changed

+347
-210
lines changed

src/algorithm.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,9 @@ where
145145
}
146146

147147
// try to use the enhanced SIMD implementation first, fall back to non-enhanced if necessary
148-
if !ops.process_enhanced_simd_blocks::<W>(state, first, rest, &reflector, keys) {
148+
if rest.is_empty()
149+
|| !ops.process_enhanced_simd_blocks::<W>(state, first, rest, &reflector, keys)
150+
{
149151
process_simd_chunks::<T, W>(state, first, rest, &reflector, keys, ops);
150152
}
151153

src/arch/mod.rs

Lines changed: 89 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -20,19 +20,19 @@ use crate::arch::aarch64::AArch64Ops;
2020
use crate::arch::x86::X86Ops;
2121

2222
#[cfg(all(target_arch = "x86_64", feature = "vpclmulqdq"))]
23-
use crate::arch::vpclmulqdq::VpclmulqdqOps;
23+
use crate::arch::vpclmulqdq::Vpclmulqdq512Ops;
2424

25-
pub(crate) mod aarch64;
25+
mod aarch64;
2626
mod software;
2727
mod vpclmulqdq;
28-
pub(crate) mod x86;
28+
mod x86;
2929

3030
/// Main entry point that dispatches to the appropriate architecture
3131
///
3232
///
3333
/// # Safety
3434
/// May use native CPU features
35-
#[inline]
35+
#[inline(always)]
3636
pub(crate) unsafe fn update(state: u64, bytes: &[u8], params: CrcParams) -> u64 {
3737
#[cfg(target_arch = "aarch64")]
3838
{
@@ -52,14 +52,16 @@ pub(crate) unsafe fn update(state: u64, bytes: &[u8], params: CrcParams) -> u64
5252
use std::arch::is_x86_feature_detected;
5353

5454
if bytes.len() >= 256 && is_x86_feature_detected!("vpclmulqdq") {
55-
let ops = vpclmulqdq::VpclmulqdqOps::new();
55+
let ops = Vpclmulqdq512Ops::new();
5656

5757
return match params.width {
58-
64 => algorithm::update::<VpclmulqdqOps, Width64>(state, bytes, params, &ops),
59-
32 => {
60-
algorithm::update::<VpclmulqdqOps, Width32>(state as u32, bytes, params, &ops)
61-
as u64
62-
}
58+
64 => algorithm::update::<Vpclmulqdq512Ops, Width64>(state, bytes, params, &ops),
59+
32 => algorithm::update::<Vpclmulqdq512Ops, Width32>(
60+
state as u32,
61+
bytes,
62+
params,
63+
&ops,
64+
) as u64,
6365
_ => panic!("Unsupported CRC width: {}", params.width),
6466
};
6567
}
@@ -85,7 +87,11 @@ pub fn get_target() -> String {
8587
return "internal-aarch64-neon".to_string();
8688

8789
#[cfg(all(target_arch = "x86_64", feature = "vpclmulqdq"))]
88-
return "internal-x86_64-avx512-vpclmulqdq".to_string();
90+
{
91+
if is_x86_feature_detected!("vpclmulqdq") {
92+
return "internal-x86_64-avx512-vpclmulqdq".to_string();
93+
}
94+
}
8995

9096
#[allow(unreachable_code)]
9197
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
@@ -101,6 +107,7 @@ mod tests {
101107
use crate::crc32::consts::CRC32_BZIP2;
102108
use crate::crc64::consts::CRC64_NVME;
103109
use crate::test::consts::{TEST_256_BYTES_STRING, TEST_ALL_CONFIGS, TEST_CHECK_STRING};
110+
use crate::test::create_aligned_data;
104111
use rand::{rng, Rng};
105112

106113
#[test]
@@ -123,6 +130,76 @@ mod tests {
123130
}
124131
}
125132

133+
#[test]
134+
fn test_256_string() {
135+
for config in TEST_ALL_CONFIGS {
136+
let actual = unsafe {
137+
update(
138+
config.get_init(),
139+
&*create_aligned_data(TEST_256_BYTES_STRING),
140+
*config.get_params(),
141+
) ^ config.get_xorout()
142+
};
143+
144+
assert_eq!(
145+
actual,
146+
config.checksum_with_reference(TEST_256_BYTES_STRING),
147+
"Mismatch CRC, {}, expected {:#x}, got {:#x}",
148+
config.get_name(),
149+
config.get_check(),
150+
actual
151+
);
152+
}
153+
}
154+
155+
#[test]
156+
fn test_512_string() {
157+
let test_string = b"12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234561234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456";
158+
159+
for config in TEST_ALL_CONFIGS {
160+
let actual = unsafe {
161+
update(
162+
config.get_init(),
163+
&*create_aligned_data(test_string),
164+
*config.get_params(),
165+
) ^ config.get_xorout()
166+
};
167+
168+
assert_eq!(
169+
actual,
170+
config.checksum_with_reference(test_string),
171+
"Mismatch CRC, {}, expected {:#x}, got {:#x}",
172+
config.get_name(),
173+
config.get_check(),
174+
actual
175+
);
176+
}
177+
}
178+
179+
#[test]
180+
fn test_1024_string() {
181+
let test_string = b"1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345612345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234561234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456";
182+
183+
for config in TEST_ALL_CONFIGS {
184+
let actual = unsafe {
185+
update(
186+
config.get_init(),
187+
&*create_aligned_data(test_string),
188+
*config.get_params(),
189+
) ^ config.get_xorout()
190+
};
191+
192+
assert_eq!(
193+
actual,
194+
config.checksum_with_reference(test_string),
195+
"Mismatch CRC, {}, expected {:#x}, got {:#x}",
196+
config.get_name(),
197+
config.get_check(),
198+
actual
199+
);
200+
}
201+
}
202+
126203
// CRC-64/NVME is a special flower in that Rust's crc library doesn't support it yet, so we have
127204
// tested values to check against.
128205
#[test]
@@ -287,7 +364,7 @@ mod tests {
287364
assert_eq!(
288365
actual,
289366
expected,
290-
"\nFailed for {} with length {}\\nGot: {:016x}\nExpected: {:016x}",
367+
"\nFailed for {} with length {}\nGot: {:016x}\nExpected: {:016x}",
291368
config.get_name(),
292369
len,
293370
actual,

0 commit comments

Comments
 (0)