Skip to content

Commit 089d65c

Browse files
authored
Reset checksum every frames (#101)
* checksum: Reset the hash every for every frame * Ignoring .idea folder * Add test
1 parent 396e100 commit 089d65c

File tree

2 files changed

+70
-1
lines changed

2 files changed

+70
-1
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,5 @@ Cargo.lock
66
/orig-zstd
77
fuzz_decodecorpus
88
perf.data*
9-
fuzz/corpus
9+
fuzz/corpus
10+
.idea

ruzstd/src/encoding/frame_compressor.rs

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,10 @@ impl<R: Read, W: Write, M: Matcher> FrameCompressor<R, W, M> {
132132
// Clearing buffers to allow re-using of the compressor
133133
self.state.matcher.reset(self.compression_level);
134134
self.state.last_huff_table = None;
135+
#[cfg(feature = "hash")]
136+
{
137+
self.hasher = XxHash64::with_seed(0);
138+
}
135139
let source = self.uncompressed_data.as_mut().unwrap();
136140
let drain = self.compressed_data.as_mut().unwrap();
137141
// As the frame is compressed, it's stored here
@@ -363,6 +367,70 @@ mod tests {
363367
assert_eq!(mock_data, decoded);
364368
}
365369

370+
#[cfg(feature = "hash")]
371+
#[test]
372+
fn checksum_two_frames_reused_compressor() {
373+
// Compress the same data twice using the same compressor and verify that:
374+
// 1. The checksum written in each frame matches what the decoder calculates.
375+
// 2. The hasher is correctly reset between frames (no cross-contamination).
376+
// If the hasher were NOT reset, the second frame's calculated checksum
377+
// would differ from the one stored in the frame data, causing assert_eq to fail.
378+
let data: Vec<u8> = (0u8..=255).cycle().take(1024).collect();
379+
380+
let mut compressor = FrameCompressor::new(super::CompressionLevel::Uncompressed);
381+
382+
// --- Frame 1 ---
383+
let mut compressed1 = Vec::new();
384+
compressor.set_source(data.as_slice());
385+
compressor.set_drain(&mut compressed1);
386+
compressor.compress();
387+
388+
// --- Frame 2 (reuse the same compressor) ---
389+
let mut compressed2 = Vec::new();
390+
compressor.set_source(data.as_slice());
391+
compressor.set_drain(&mut compressed2);
392+
compressor.compress();
393+
394+
fn decode_and_collect(compressed: &[u8]) -> (Vec<u8>, Option<u32>, Option<u32>) {
395+
let mut decoder = FrameDecoder::new();
396+
let mut source = compressed;
397+
decoder.reset(&mut source).unwrap();
398+
while !decoder.is_finished() {
399+
decoder
400+
.decode_blocks(&mut source, crate::decoding::BlockDecodingStrategy::All)
401+
.unwrap();
402+
}
403+
let mut decoded = Vec::new();
404+
decoder.collect_to_writer(&mut decoded).unwrap();
405+
(
406+
decoded,
407+
decoder.get_checksum_from_data(),
408+
decoder.get_calculated_checksum(),
409+
)
410+
}
411+
412+
let (decoded1, chksum_from_data1, chksum_calculated1) = decode_and_collect(&compressed1);
413+
assert_eq!(decoded1, data, "frame 1: decoded data mismatch");
414+
assert_eq!(
415+
chksum_from_data1, chksum_calculated1,
416+
"frame 1: checksum mismatch"
417+
);
418+
419+
let (decoded2, chksum_from_data2, chksum_calculated2) = decode_and_collect(&compressed2);
420+
assert_eq!(decoded2, data, "frame 2: decoded data mismatch");
421+
assert_eq!(
422+
chksum_from_data2, chksum_calculated2,
423+
"frame 2: checksum mismatch"
424+
);
425+
426+
// Same data compressed twice must produce the same checksum.
427+
// If state leaked across frames, the second calculated checksum would differ.
428+
assert_eq!(
429+
chksum_from_data1, chksum_from_data2,
430+
"frame 1 and frame 2 should have the same checksum (same data, hash must reset per frame)"
431+
);
432+
}
433+
366434
#[cfg(feature = "std")]
367435
#[test]
368436
fn fuzz_targets() {

0 commit comments

Comments
 (0)