@@ -186,8 +186,8 @@ impl Digest for Crc {
186186/// CRC32B (ISO 3309) implementation using crc_fast with SIMD optimization
187187///
188188/// Performance characteristics:
189- /// - AVX512 (>100 GiB/s): x86_64 with AVX512 support
190- /// - SSE: x86_64 without AVX512 (fallback)
189+ /// - AVX512 (>100 GiB/s): x86_64 with AVX512 support (optimized for 256+ byte chunks)
190+ /// - SSE: x86_64 without AVX512 (fallback with buffer batching )
191191/// - NEON: ARM64 with NEON support
192192/// - Software: Other architectures
193193///
@@ -197,10 +197,46 @@ impl Digest for Crc {
197197pub struct CRC32B {
198198 digest : crc_fast:: Digest ,
199199 /// Buffer for batch processing to improve cache efficiency
200+ /// Sized for optimal AVX512 performance (256+ bytes for SIMD)
200201 buffer : Vec < u8 > ,
202+ /// Detected SIMD capability for optimization
203+ #[ cfg( target_arch = "x86_64" ) ]
204+ has_avx512 : bool ,
201205}
202206
203207impl CRC32B {
208+ /// Detect AVX512 support on x86_64
209+ #[ cfg( target_arch = "x86_64" ) ]
210+ fn detect_avx512 ( ) -> bool {
211+ #[ cfg( target_feature = "avx512f" ) ]
212+ {
213+ true
214+ }
215+ #[ cfg( not( target_feature = "avx512f" ) ) ]
216+ {
217+ false
218+ }
219+ }
220+
221+ /// Get optimal buffer size based on SIMD capabilities
222+ #[ cfg( target_arch = "x86_64" ) ]
223+ fn optimal_buffer_size ( & self ) -> usize {
224+ if self . has_avx512 {
225+ // AVX512 processes 256+ bytes efficiently
226+ // Use larger buffer to maximize throughput
227+ 65536 // 64KB for AVX512 optimization
228+ } else {
229+ // SSE processes smaller chunks
230+ // Use smaller buffer to avoid cache misses
231+ 8192 // 8KB for SSE fallback
232+ }
233+ }
234+
235+ #[ cfg( not( target_arch = "x86_64" ) ) ]
236+ fn optimal_buffer_size ( & self ) -> usize {
237+ 8192 // Default 8KB for other architectures
238+ }
239+
204240 /// Flush buffered data to digest
205241 fn flush_buffer ( & mut self ) {
206242 if !self . buffer . is_empty ( ) {
@@ -212,18 +248,47 @@ impl CRC32B {
212248
213249impl Digest for CRC32B {
214250 fn new ( ) -> Self {
251+ #[ cfg( target_arch = "x86_64" ) ]
252+ let has_avx512 = Self :: detect_avx512 ( ) ;
253+
254+ let optimal_size = if cfg ! ( target_arch = "x86_64" ) {
255+ #[ cfg( target_arch = "x86_64" ) ]
256+ {
257+ if has_avx512 {
258+ 65536
259+ } else {
260+ 8192
261+ }
262+ }
263+ #[ cfg( not( target_arch = "x86_64" ) ) ]
264+ {
265+ 8192
266+ }
267+ } else {
268+ 8192
269+ } ;
270+
215271 Self {
216272 digest : crc_fast:: Digest :: new ( crc_fast:: CrcAlgorithm :: Crc32IsoHdlc ) ,
217- buffer : Vec :: with_capacity ( 8192 ) ,
273+ buffer : Vec :: with_capacity ( optimal_size) ,
274+ #[ cfg( target_arch = "x86_64" ) ]
275+ has_avx512,
218276 }
219277 }
220278
221279 fn hash_update ( & mut self , input : & [ u8 ] ) {
280+ #[ cfg( target_arch = "x86_64" ) ]
281+ let threshold = if self . has_avx512 { 256 } else { 4096 } ;
282+
283+ #[ cfg( not( target_arch = "x86_64" ) ) ]
284+ let threshold = 4096 ;
285+
222286 // For small inputs, buffer them for better cache efficiency
223287 // For large inputs, flush buffer and process directly
224- if input. len ( ) < 4096 {
288+ if input. len ( ) < threshold {
225289 self . buffer . extend_from_slice ( input) ;
226- if self . buffer . len ( ) >= 8192 {
290+ let max_buffer = self . optimal_buffer_size ( ) ;
291+ if self . buffer . len ( ) >= max_buffer {
227292 self . flush_buffer ( ) ;
228293 }
229294 } else {
0 commit comments