Skip to content

Commit 0f22b66

Browse files
committed
Fine-tune inline target_feature attributes
Yields a 12-25% performance boost for “small” (1 KiB) payloads. Intel Sapphire Rapids is ~25%, AMD Genoa is ~15%, Apple M3 Ultra is ~12%, and AWS Graviton 4 is ~8%.
1 parent 6553e2a commit 0f22b66

File tree

3 files changed

+15
-1
lines changed

3 files changed

+15
-1
lines changed

src/algorithm.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ use crate::{crc32, crc64};
2929
)]
3030
#[cfg_attr(
3131
all(target_arch = "x86_64", feature = "vpclmulqdq"),
32-
target_feature(enable = "sse2,sse4.1,pclmulqdq,avx2,vpclmulqdq")
32+
target_feature(enable = "avx2,vpclmulqdq,avx512f,avx512vl")
3333
)]
3434
#[cfg_attr(target_arch = "aarch64", target_feature(enable = "neon,aes"))]
3535
pub unsafe fn update<T: ArchOps, W: EnhancedCrcWidth>(
@@ -120,6 +120,10 @@ where
120120
any(target_arch = "x86", target_arch = "x86_64"),
121121
target_feature(enable = "sse2,sse4.1,pclmulqdq")
122122
)]
123+
#[cfg_attr(
124+
all(target_arch = "x86_64", feature = "vpclmulqdq"),
125+
target_feature(enable = "avx2,vpclmulqdq,avx512f,avx512vl")
126+
)]
123127
#[cfg_attr(target_arch = "aarch64", target_feature(enable = "neon,aes"))]
124128
unsafe fn process_large_aligned<T: ArchOps, W: EnhancedCrcWidth>(
125129
bytes: &[u8],

src/arch/mod.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,15 @@ pub(crate) mod x86;
3333
/// # Safety
3434
/// May use native CPU features
3535
#[inline]
36+
#[cfg_attr(
37+
any(target_arch = "x86", target_arch = "x86_64"),
38+
target_feature(enable = "sse2,sse4.1,pclmulqdq")
39+
)]
40+
#[cfg_attr(
41+
all(target_arch = "x86_64", feature = "vpclmulqdq"),
42+
target_feature(enable = "avx2,vpclmulqdq,avx512f,avx512vl")
43+
)]
44+
#[cfg_attr(target_arch = "aarch64", target_feature(enable = "neon,aes"))]
3645
pub(crate) unsafe fn update(state: u64, bytes: &[u8], params: CrcParams) -> u64 {
3746
#[cfg(target_arch = "aarch64")]
3847
{

src/structs.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ pub struct CrcState<T> {
4747
pub(crate) struct Calculator {}
4848

4949
impl CrcCalculator for Calculator {
50+
#[inline(always)]
5051
fn calculate(state: u64, data: &[u8], params: CrcParams) -> u64 {
5152
unsafe { arch::update(state, data, params) }
5253
}

0 commit comments

Comments
 (0)