Skip to content

Commit 0c32f9d

Browse files
committed
cksum: Use crc_fast::Digest on the fly to improve performances
1 parent b3d71e3 commit 0c32f9d

File tree

2 files changed

+46
-52
lines changed

2 files changed

+46
-52
lines changed

src/uucore/src/lib/features/checksum.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1155,7 +1155,7 @@ where
11551155

11561156
pub fn digest_reader<T: Read>(
11571157
digest: &mut Box<dyn Digest>,
1158-
reader: &mut BufReader<T>,
1158+
reader: &mut T,
11591159
binary: bool,
11601160
output_bits: usize,
11611161
) -> io::Result<(String, usize)> {

src/uucore/src/lib/features/sum.rs

Lines changed: 45 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -18,22 +18,6 @@ use hex::encode;
1818
#[cfg(windows)]
1919
use memchr::memmem;
2020

21-
use crc_fast::{CrcParams, checksum_with_params};
22-
23-
// POSIX cksum SIMD configuration for crc-fast
24-
// This uses SIMD instructions (PCLMULQDQ) for fast CRC computation
25-
fn get_posix_cksum_params() -> CrcParams {
26-
CrcParams::new(
27-
"CRC-32/CKSUM", // Name
28-
32, // Width
29-
0x04c11db7, // Polynomial
30-
0x00000000, // Initial CRC value: 0 (not 0xffffffff)
31-
false, // No input reflection (refin)
32-
0xffffffff, // XOR output with 0xffffffff (xorout)
33-
0, // Check value (not used)
34-
)
35-
}
36-
3721
pub trait Digest {
3822
fn new() -> Self
3923
where
@@ -139,50 +123,60 @@ impl Digest for Sm3 {
139123
}
140124

141125
pub struct Crc {
142-
state: u32,
126+
digest: crc_fast::Digest,
143127
size: usize,
144-
// Store data for SIMD processing
145-
data_buffer: Vec<u8>,
128+
}
129+
130+
impl Crc {
131+
/// POSIX cksum SIMD configuration for crc-fast
132+
/// This uses SIMD instructions (PCLMULQDQ) for fast CRC computation
133+
fn get_posix_cksum_params() -> crc_fast::CrcParams {
134+
crc_fast::CrcParams::new(
135+
"CRC-32/CKSUM", // Name
136+
32, // Width
137+
0x04c11db7, // Polynomial
138+
0x00000000, // Initial CRC value: 0 (not 0xffffffff)
139+
false, // No input reflection (refin)
140+
0xffffffff, // XOR output with 0xffffffff (xorout)
141+
0, // Check value (not used)
142+
)
143+
}
146144
}
147145

148146
impl Digest for Crc {
149147
fn new() -> Self {
150148
Self {
151-
state: 0,
149+
digest: crc_fast::Digest::new_with_params(Self::get_posix_cksum_params()),
152150
size: 0,
153-
data_buffer: Vec::with_capacity(8192),
154151
}
155152
}
156153

157154
fn hash_update(&mut self, input: &[u8]) {
155+
self.digest.update(input);
158156
self.size += input.len();
159-
// Store data for SIMD processing
160-
self.data_buffer.extend_from_slice(input);
161157
}
162158

163159
fn hash_finalize(&mut self, out: &mut [u8]) {
164-
// Add the size bytes to the data buffer
160+
// Add the size at the end of the buffer.
165161
let mut sz = self.size;
166-
while sz != 0 {
167-
self.data_buffer.push(sz as u8);
162+
while sz > 0 {
163+
self.digest.update(&[sz as u8]);
168164
sz >>= 8;
169165
}
170166

171-
// Use SIMD-accelerated CRC computation
172-
self.state = checksum_with_params(get_posix_cksum_params(), &self.data_buffer) as u32;
173-
out.copy_from_slice(&self.state.to_ne_bytes());
167+
let xout = self.digest.finalize();
168+
out.copy_from_slice(&xout.to_ne_bytes());
174169
}
175170

176171
fn result_str(&mut self) -> String {
177-
let mut _out: Vec<u8> = vec![0; 4];
178-
self.hash_finalize(&mut _out);
179-
format!("{}", self.state)
172+
let mut out: [u8; 8] = [0; 8];
173+
self.hash_finalize(&mut out);
174+
u64::from_ne_bytes(out).to_string()
180175
}
181176

182177
fn reset(&mut self) {
183-
self.state = 0;
178+
self.digest.reset();
184179
self.size = 0;
185-
self.data_buffer.clear();
186180
}
187181

188182
fn output_bits(&self) -> usize {
@@ -525,8 +519,8 @@ mod tests {
525519
crc1.hash_update(b"test");
526520
crc2.hash_update(b"test");
527521

528-
let mut out1 = [0u8; 4];
529-
let mut out2 = [0u8; 4];
522+
let mut out1 = [0u8; 8];
523+
let mut out2 = [0u8; 8];
530524
crc1.hash_finalize(&mut out1);
531525
crc2.hash_finalize(&mut out2);
532526

@@ -538,15 +532,15 @@ mod tests {
538532
let mut crc = Crc::new();
539533

540534
// Test empty input
541-
let mut output = [0u8; 4];
535+
let mut output = [0u8; 8];
542536
crc.hash_finalize(&mut output);
543-
let empty_result = u32::from_ne_bytes(output);
537+
let empty_result = u64::from_ne_bytes(output);
544538

545539
// Reset and test with "test" string
546-
crc.reset();
540+
let mut crc = Crc::new();
547541
crc.hash_update(b"test");
548542
crc.hash_finalize(&mut output);
549-
let test_result = u32::from_ne_bytes(output);
543+
let test_result = u64::from_ne_bytes(output);
550544

551545
// The result should be different for different inputs
552546
assert_ne!(empty_result, test_result);
@@ -565,14 +559,14 @@ mod tests {
565559

566560
// Process all at once
567561
crc1.hash_update(data);
568-
let mut output1 = [0u8; 4];
562+
let mut output1 = [0u8; 8];
569563
crc1.hash_finalize(&mut output1);
570564

571565
// Process in chunks
572566
crc2.hash_update(&data[0..10]);
573567
crc2.hash_update(&data[10..30]);
574568
crc2.hash_update(&data[30..]);
575-
let mut output2 = [0u8; 4];
569+
let mut output2 = [0u8; 8];
576570
crc2.hash_finalize(&mut output2);
577571

578572
assert_eq!(output1, output2);
@@ -585,15 +579,15 @@ mod tests {
585579

586580
let mut crc_optimized = Crc::new();
587581
crc_optimized.hash_update(test_data);
588-
let mut output_opt = [0u8; 4];
582+
let mut output_opt = [0u8; 8];
589583
crc_optimized.hash_finalize(&mut output_opt);
590584

591585
// Create a reference implementation using hash_update
592586
let mut crc_reference = Crc::new();
593587
for &byte in test_data {
594588
crc_reference.hash_update(&[byte]);
595589
}
596-
let mut output_ref = [0u8; 4];
590+
let mut output_ref = [0u8; 8];
597591
crc_reference.hash_finalize(&mut output_ref);
598592

599593
assert_eq!(output_opt, output_ref);
@@ -604,17 +598,17 @@ mod tests {
604598
// Test against our CRC implementation values
605599
// Note: These are the correct values for our POSIX cksum implementation
606600
let test_cases = [
607-
("", 4294967295u32),
608-
("a", 1220704766u32),
609-
("abc", 1219131554u32),
601+
("", 4294967295_u64),
602+
("a", 1220704766_u64),
603+
("abc", 1219131554_u64),
610604
];
611605

612606
for (input, expected) in test_cases {
613607
let mut crc = Crc::new();
614608
crc.hash_update(input.as_bytes());
615-
let mut output = [0u8; 4];
609+
let mut output = [0u8; 8];
616610
crc.hash_finalize(&mut output);
617-
let result = u32::from_ne_bytes(output);
611+
let result = u64::from_ne_bytes(output);
618612

619613
assert_eq!(result, expected, "CRC mismatch for input: '{}'", input);
620614
}
@@ -633,8 +627,8 @@ mod tests {
633627
crc2.hash_update(data9);
634628

635629
// Should not panic and should produce valid results
636-
let mut out1 = [0u8; 4];
637-
let mut out2 = [0u8; 4];
630+
let mut out1 = [0u8; 8];
631+
let mut out2 = [0u8; 8];
638632
crc.hash_finalize(&mut out1);
639633
crc2.hash_finalize(&mut out2);
640634

0 commit comments

Comments
 (0)