Skip to content

Commit 41b7600

Browse files
committed
Update benchmarks in README
Represents the new performance impact from the wider AVX512 registers.
1 parent c2f30c7 commit 41b7600

File tree

2 files changed

+34
-10
lines changed

2 files changed

+34
-10
lines changed

README.md

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -297,14 +297,14 @@ AKA `crc32` in many, but not all, implementations.
297297

298298
### CRC-64/NVME
299299

300-
| Arch | Brand | CPU | System | Target | 1KiB (GiB/s) | 1MiB (GiB/s) |
301-
|:--------|:------|:----------------|:--------------------------|:----------------|-------------:|-------------:|
302-
| x86_64 | Intel | Sapphire Rapids | EC2 c7i.metal-48xl | avx2_vpclmulqdq | ~17.0 | ~56.4 |
303-
| x86_64 | AMD | Genoa | EC2 c7a.metal-48xl | avx2_vpclmulqdq | ~17.3 | ~27.4 |
304-
| aarch64 | AWS | Graviton4 | EC2 c8g.metal-48xl | neon_pclmulqdq | ~16.3 | ~16.3 |
305-
| aarch64 | Apple | M3 Ultra | Mac Studio (32 core) | neon_pclmulqdq | ~44.0 | ~71.9 |
306-
| aarch64 | Apple | M4 Max | MacBook Pro 16" (16 core) | neon_pclmulqdq | ~40.3 | ~72.3 |
307-
| aarch64 | Apple | M2 Ultra | Mac Studio (24 core) | neon_pclmulqdq | ~39.3 | ~65.0 |
300+
| Arch | Brand | CPU | System | Target | 1KiB (GiB/s) | 1MiB (GiB/s) |
301+
|:--------|:------|:----------------|:--------------------------|:------------------|-------------:|-------------:|
302+
| x86_64 | Intel | Sapphire Rapids | EC2 c7i.metal-48xl | avx512_vpclmulqdq | ~20.3 | ~94.1 |
303+
| x86_64 | AMD | Genoa | EC2 c7a.metal-48xl | avx512_vpclmulqdq | ~18.3 | ~53.9 |
304+
| aarch64 | AWS | Graviton4 | EC2 c8g.metal-48xl | neon_pclmulqdq | ~16.3 | ~16.3 |
305+
| aarch64 | Apple | M3 Ultra | Mac Studio (32 core) | neon_pclmulqdq | ~44.0 | ~71.9 |
306+
| aarch64 | Apple | M4 Max | MacBook Pro 16" (16 core) | neon_pclmulqdq | ~40.3 | ~72.3 |
307+
| aarch64 | Apple | M2 Ultra | Mac Studio (24 core) | neon_pclmulqdq | ~39.3 | ~65.0 |
308308

309309
## Other CRC widths
310310

benches/benchmark.rs

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,28 @@ fn random_data(size: i32) -> Vec<u8> {
5151
buf
5252
}
5353

54+
fn create_aligned_data(input: &[u8]) -> Vec<u8> {
55+
// Size of our target alignment structure
56+
let align_size = std::mem::size_of::<[[u64; 4]; 2]>(); // 64 bytes
57+
58+
// Create a vector with padding to ensure we can find a properly aligned position
59+
let mut padded = Vec::with_capacity(input.len() + align_size);
60+
61+
// Fill with zeros initially to reach needed capacity
62+
padded.resize(input.len() + align_size, 0);
63+
64+
// Find the first address that satisfies our alignment
65+
let start_addr = padded.as_ptr() as usize;
66+
let align_offset = (align_size - (start_addr % align_size)) % align_size;
67+
68+
// Copy the input into the aligned position
69+
let aligned_start = &mut padded[align_offset..];
70+
aligned_start[..input.len()].copy_from_slice(input);
71+
72+
// Return the exact slice we need
73+
aligned_start[..input.len()].to_vec()
74+
}
75+
5476
#[inline(always)]
5577
fn bench_crc32(c: &mut Criterion) {
5678
let mut group = c.benchmark_group("CRC-32");
@@ -65,7 +87,7 @@ fn bench_crc32(c: &mut Criterion) {
6587
);
6688

6789
for (size_name, size) in SIZES {
68-
let buf = random_data(*size);
90+
let buf = create_aligned_data(&*random_data(*size));
6991

7092
let (part1, rest) = buf.split_at(buf.len() / 4);
7193
let (part2, rest) = rest.split_at(rest.len() / 3);
@@ -108,7 +130,7 @@ fn bench_crc64(c: &mut Criterion) {
108130
let mut group = c.benchmark_group("CRC-64");
109131

110132
for (size_name, size) in SIZES {
111-
let buf = random_data(*size);
133+
let buf = create_aligned_data(&*random_data(*size));
112134

113135
let (part1, rest) = buf.split_at(buf.len() / 4);
114136
let (part2, rest) = rest.split_at(rest.len() / 3);
@@ -122,6 +144,8 @@ fn bench_crc64(c: &mut Criterion) {
122144

123145
group.throughput(Throughput::Bytes(*size as u64));
124146

147+
group.measurement_time(Duration::from_secs(60));
148+
125149
let bench_name = [alg_suffix.unwrap(), "(checksum)"].join(" ");
126150

127151
group.bench_function(BenchmarkId::new(bench_name, size_name), |b| {

0 commit comments

Comments
 (0)