Skip to content

Commit d0c9b22

Browse files
committed
crc-fast-rust: fixing the potenial UB in the unsafe block (algorithm.rs); trying to read memory before slice. refactored (8->6 args) to fix 'too many arguments' warnings. added better docs
1 parent 7d5a104 commit d0c9b22

File tree

1 file changed

+46
-13
lines changed

1 file changed

+46
-13
lines changed

src/algorithm.rs

Lines changed: 46 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -400,8 +400,11 @@ where
400400

401401
// Use the shared function to handle the last two chunks
402402
let final_xmm7 = get_last_two_xmms::<T, W>(
403-
&data[CRC_CHUNK_SIZE..],
404-
remaining_len,
403+
DataRegion {
404+
full_data: data,
405+
offset: CRC_CHUNK_SIZE,
406+
remaining: remaining_len,
407+
},
405408
xmm7,
406409
keys,
407410
reflector,
@@ -461,8 +464,11 @@ where
461464
if remaining_len > 0 {
462465
// Use the shared get_last_two_xmms function to handle the remaining bytes
463466
xmm7 = get_last_two_xmms::<T, W>(
464-
&data[current_pos..],
465-
remaining_len,
467+
DataRegion {
468+
full_data: data,
469+
offset: current_pos,
470+
remaining: remaining_len,
471+
},
466472
xmm7,
467473
keys,
468474
reflector,
@@ -475,17 +481,35 @@ where
475481
W::perform_final_reduction(xmm7, state.reflected, keys, ops)
476482
}
477483

484+
/// Data region descriptor for overlapping SIMD reads in CRC processing
485+
struct DataRegion<'a> {
486+
full_data: &'a [u8],
487+
offset: usize,
488+
remaining: usize,
489+
}
490+
478491
/// Handle the last two chunks of data (for small inputs)
479492
/// This shared implementation works for both CRC-32 and CRC-64
493+
///
494+
/// # Safety
495+
///
496+
/// This function requires:
497+
/// - `region.full_data` must contain at least `region.offset + region.remaining` bytes
498+
/// - `region.offset` must be >= `CRC_CHUNK_SIZE` (16 bytes) to allow reading the overlapping region
499+
/// - `region.remaining` must be in range 1..=15 (less than one chunk)
500+
/// - The caller must ensure appropriate SIMD features are available (checked via target_feature)
501+
///
502+
/// The function performs an overlapping read: it loads 16 bytes starting from position
503+
/// `region.offset - CRC_CHUNK_SIZE + region.remaining` in the full buffer. This creates a vector
504+
/// containing both the tail of the previously processed chunk and the new remaining data.
480505
#[inline]
481506
#[cfg_attr(
482507
any(target_arch = "x86", target_arch = "x86_64"),
483508
target_feature(enable = "ssse3,sse4.1,pclmulqdq")
484509
)]
485510
#[cfg_attr(target_arch = "aarch64", target_feature(enable = "aes"))]
486511
unsafe fn get_last_two_xmms<T: ArchOps, W: EnhancedCrcWidth>(
487-
data: &[u8],
488-
remaining_len: usize,
512+
region: DataRegion,
489513
current_state: T::Vector,
490514
keys: [u64; 23],
491515
reflector: &Reflector<T::Vector>,
@@ -495,20 +519,29 @@ unsafe fn get_last_two_xmms<T: ArchOps, W: EnhancedCrcWidth>(
495519
where
496520
T::Vector: Copy,
497521
{
522+
// Safety check: ensure we have enough data before the offset for the overlapping read
523+
debug_assert!(region.offset >= CRC_CHUNK_SIZE,
524+
"offset must be >= CRC_CHUNK_SIZE to allow overlapping read");
525+
debug_assert!(region.remaining > 0 && region.remaining < CRC_CHUNK_SIZE,
526+
"remaining must be 1..15 bytes");
527+
debug_assert!(region.offset + region.remaining <= region.full_data.len(),
528+
"offset + remaining must not exceed full_data length");
498529
// Create coefficient for folding operations
499530
let coefficient = W::create_coefficient(keys[2], keys[1], reflected, ops);
500531

501532
let const_mask = ops.set_all_bytes(0x80);
502533

503534
// Get table pointer and offset based on CRC width
504-
let (table_ptr, offset) = W::get_last_bytes_table_ptr(reflected, remaining_len);
535+
let (table_ptr, offset) = W::get_last_bytes_table_ptr(reflected, region.remaining);
505536

506537
if reflected {
507538
// For reflected mode (CRC-32r, CRC-64r)
508539

509-
// Load the remaining data
510-
// Special pointer arithmetic to match the original implementation
511-
let xmm1 = ops.load_bytes(data.as_ptr().sub(CRC_CHUNK_SIZE).add(remaining_len)); // DON: looks correct
540+
// Load the remaining data using an overlapping read
541+
// This loads 16 bytes starting from (offset - CRC_CHUNK_SIZE + remaining),
542+
// which includes the tail of the previous chunk plus the remaining new data
543+
let read_offset = region.offset - CRC_CHUNK_SIZE + region.remaining;
544+
let xmm1 = ops.load_bytes(region.full_data.as_ptr().add(read_offset));
512545

513546
// Load the shuffle mask
514547
let mut xmm0 = ops.load_bytes(table_ptr.add(offset));
@@ -549,9 +582,9 @@ where
549582
} else {
550583
// For non-reflected mode (CRC-32f, CRC-64f)
551584

552-
// Load the remaining data and apply reflection if needed
553-
let data_ptr = data.as_ptr().sub(CRC_CHUNK_SIZE).add(remaining_len);
554-
let mut xmm1 = ops.load_bytes(data_ptr);
585+
// Load the remaining data using an overlapping read (same as reflected mode)
586+
let read_offset = region.offset - CRC_CHUNK_SIZE + region.remaining;
587+
let mut xmm1 = ops.load_bytes(region.full_data.as_ptr().add(read_offset));
555588

556589
// Apply reflection if in forward mode
557590
if let Reflector::ForwardReflector { smask } = reflector {

0 commit comments

Comments
 (0)