Skip to content

Commit 5f7d705

Browse files
committed
fix: decompress concatenated gzip, bzip2, and lz4 streams
Fixes #855 Previously, ouch would only decompress the first frame/stream of concatenated compressed files. This is a valid format for gzip, bzip2, and lz4 where multiple compressed streams are concatenated together. Changes: - Use MultiGzDecoder instead of GzDecoder for gzip - Use MultiBzDecoder instead of BzDecoder for bzip2 - Add MultiFrameLz4Decoder wrapper to handle concatenated lz4 frames - Add integration tests for concatenated stream decompression
1 parent 1920a65 commit 5f7d705

File tree

6 files changed

+170
-9
lines changed

6 files changed

+170
-9
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ Categories Used:
4444

4545
### Bug Fixes
4646

47+
- Fix decompression of concatenated gzip, bzip2, and lz4 streams [\#855](https://github.com/ouch-org/ouch/issues/855)
4748
- Fix 7z BadSignature error when compressing and then listing [\#819](https://github.com/ouch-org/ouch/pull/819) ([tommady](https://github.com/tommady))
4849
- Fix tar extraction count when --quiet [\#824](https://github.com/ouch-org/ouch/pull/824) ([marcospb19](https://github.com/marcospb19))
4950
- Fix unpacking with merge flag failing without --dir flag [\#826](https://github.com/ouch-org/ouch/pull/826) ([tommady](https://github.com/tommady))

src/commands/decompress.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ use crate::{
1818
info, info_accessible,
1919
utils::{
2020
self, io::lock_and_flush_output_stdio, is_path_stdin, nice_directory_display, set_permission_mode,
21-
user_wants_to_continue,
21+
user_wants_to_continue, MultiFrameLz4Decoder,
2222
},
2323
QuestionAction, QuestionPolicy, BUFFER_CAPACITY,
2424
};
@@ -123,16 +123,16 @@ pub fn decompress_file(options: DecompressOptions) -> crate::Result<()> {
123123
// Grab previous decoder and wrap it inside of a new one
124124
let chain_reader_decoder = |format: &CompressionFormat, decoder: Box<dyn Read>| -> crate::Result<Box<dyn Read>> {
125125
let decoder: Box<dyn Read> = match format {
126-
Gzip => Box::new(flate2::read::GzDecoder::new(decoder)),
127-
Bzip => Box::new(bzip2::read::BzDecoder::new(decoder)),
126+
Gzip => Box::new(flate2::read::MultiGzDecoder::new(decoder)),
127+
Bzip => Box::new(bzip2::read::MultiBzDecoder::new(decoder)),
128128
Bzip3 => {
129129
#[cfg(not(feature = "bzip3"))]
130130
return Err(archive::bzip3_stub::no_support());
131131

132132
#[cfg(feature = "bzip3")]
133133
Box::new(bzip3::read::Bz3Decoder::new(decoder)?)
134134
}
135-
Lz4 => Box::new(lz4_flex::frame::FrameDecoder::new(decoder)),
135+
Lz4 => Box::new(MultiFrameLz4Decoder::new(decoder)?),
136136
Lzma => Box::new(lzma_rust2::LzmaReader::new_mem_limit(decoder, u32::MAX, None)?),
137137
Xz => Box::new(lzma_rust2::XzReader::new(decoder, true)),
138138
Lzip => Box::new(lzma_rust2::LzipReader::new(decoder)?),

src/commands/list.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ use crate::{
1010
commands::warn_user_about_loading_zip_in_memory,
1111
extension::CompressionFormat::{self, *},
1212
list::{self, FileInArchive, ListOptions},
13-
utils::{io::lock_and_flush_output_stdio, user_wants_to_continue},
13+
utils::{io::lock_and_flush_output_stdio, user_wants_to_continue, MultiFrameLz4Decoder},
1414
QuestionAction, QuestionPolicy, BUFFER_CAPACITY,
1515
};
1616

@@ -47,16 +47,16 @@ pub fn list_archive_contents(
4747
let chain_reader_decoder =
4848
|format: CompressionFormat, decoder: Box<dyn Read + Send>| -> crate::Result<Box<dyn Read + Send>> {
4949
let decoder: Box<dyn Read + Send> = match format {
50-
Gzip => Box::new(flate2::read::GzDecoder::new(decoder)),
51-
Bzip => Box::new(bzip2::read::BzDecoder::new(decoder)),
50+
Gzip => Box::new(flate2::read::MultiGzDecoder::new(decoder)),
51+
Bzip => Box::new(bzip2::read::MultiBzDecoder::new(decoder)),
5252
Bzip3 => {
5353
#[cfg(not(feature = "bzip3"))]
5454
return Err(archive::bzip3_stub::no_support());
5555

5656
#[cfg(feature = "bzip3")]
5757
Box::new(bzip3::read::Bz3Decoder::new(decoder).unwrap())
5858
}
59-
Lz4 => Box::new(lz4_flex::frame::FrameDecoder::new(decoder)),
59+
Lz4 => Box::new(MultiFrameLz4Decoder::new(decoder)?),
6060
Lzma => Box::new(lzma_rust2::LzmaReader::new_mem_limit(decoder, u32::MAX, None)?),
6161
Xz => Box::new(lzma_rust2::XzReader::new(decoder, true)),
6262
Lzip => Box::new(lzma_rust2::LzipReader::new(decoder)?),

src/utils/io.rs

Lines changed: 75 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use std::io::{self, stderr, stdout, StderrLock, StdoutLock, Write};
1+
use std::io::{self, stderr, stdout, Read, StderrLock, StdoutLock, Write};
22

33
use crate::utils::logger;
44

@@ -14,3 +14,77 @@ pub fn lock_and_flush_output_stdio() -> io::Result<StdioOutputLocks> {
1414

1515
Ok((stdout, stderr))
1616
}
17+
18+
/// A wrapper around lz4_flex::frame::FrameDecoder that handles concatenated lz4 frames.
19+
/// The standard FrameDecoder only reads a single frame and returns EOF.
20+
/// This wrapper continues reading subsequent frames until the underlying reader is exhausted.
21+
pub struct MultiFrameLz4Decoder {
22+
buffer: io::Cursor<Vec<u8>>,
23+
}
24+
25+
impl MultiFrameLz4Decoder {
26+
pub fn new(mut reader: impl Read) -> io::Result<Self> {
27+
// Decompress all concatenated frames into an in-memory buffer
28+
let mut output = Vec::new();
29+
let mut input_buffer = Vec::new();
30+
reader.read_to_end(&mut input_buffer)?;
31+
32+
let mut cursor = io::Cursor::new(input_buffer);
33+
34+
// LZ4 frame magic number (little-endian)
35+
const LZ4_MAGIC: [u8; 4] = [0x04, 0x22, 0x4D, 0x18];
36+
37+
loop {
38+
let pos = cursor.position() as usize;
39+
let remaining = cursor.get_ref().len() - pos;
40+
41+
if remaining == 0 {
42+
break;
43+
}
44+
45+
if remaining < 4 {
46+
return Err(io::Error::new(
47+
io::ErrorKind::UnexpectedEof,
48+
"Incomplete LZ4 frame header",
49+
));
50+
}
51+
52+
// Check for magic number
53+
let slice = &cursor.get_ref()[pos..pos + 4];
54+
if slice != LZ4_MAGIC {
55+
return Err(io::Error::new(
56+
io::ErrorKind::InvalidData,
57+
"Invalid LZ4 frame magic number",
58+
));
59+
}
60+
61+
// Create a decoder for this frame starting from current position
62+
cursor.set_position(pos as u64);
63+
let frame_reader = io::Cursor::new(&cursor.get_ref()[pos..]);
64+
let mut decoder = lz4_flex::frame::FrameDecoder::new(frame_reader);
65+
66+
// Read the frame
67+
let start_len = output.len();
68+
decoder.read_to_end(&mut output)?;
69+
70+
// Get how many bytes were consumed from the input
71+
let bytes_consumed = decoder.into_inner().position() as usize;
72+
cursor.set_position((pos + bytes_consumed) as u64);
73+
74+
// If no progress was made, break to avoid infinite loop
75+
if output.len() == start_len && bytes_consumed == 0 {
76+
break;
77+
}
78+
}
79+
80+
Ok(Self {
81+
buffer: io::Cursor::new(output),
82+
})
83+
}
84+
}
85+
86+
impl Read for MultiFrameLz4Decoder {
87+
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
88+
self.buffer.read(buf)
89+
}
90+
}

src/utils/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ mod file_visibility;
88
mod formatting;
99
mod fs;
1010
pub mod io;
11+
pub use io::MultiFrameLz4Decoder;
1112
pub mod logger;
1213
mod question;
1314
pub mod threads;

tests/integration.rs

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1209,3 +1209,88 @@ fn tar_hardlink_pack_and_unpack() {
12091209
assert_eq!(out_source_meta.ino(), out_link1_meta.ino());
12101210
assert_eq!(out_link1_meta.ino(), out_link2_meta.ino());
12111211
}
1212+
1213+
/// Helper function to test decompression of concatenated streams (issue #855).
1214+
/// Takes a file extension and a compression function that compresses a single chunk.
1215+
fn test_concatenated_streams(extension: &str, compress_chunk: impl Fn(&[u8]) -> Vec<u8>) {
1216+
use std::io::Write;
1217+
1218+
let temp_dir = tempdir().unwrap();
1219+
let root_path = temp_dir.path();
1220+
1221+
// Create content for three separate streams
1222+
let chunks: &[&[u8]] = &[
1223+
b"First stream content - this is stream 1\n",
1224+
b"Second stream content - this is stream 2\n",
1225+
b"Third stream content - this is stream 3\n",
1226+
];
1227+
1228+
// Create the concatenated file
1229+
let concatenated_path = root_path.join(format!("concatenated.{extension}"));
1230+
{
1231+
let mut file = fs::File::create(&concatenated_path).unwrap();
1232+
for chunk in chunks {
1233+
file.write_all(&compress_chunk(chunk)).unwrap();
1234+
}
1235+
}
1236+
1237+
// Decompress using ouch
1238+
crate::utils::cargo_bin()
1239+
.arg("decompress")
1240+
.arg(&concatenated_path)
1241+
.arg("-d")
1242+
.arg(root_path)
1243+
.arg("--yes")
1244+
.assert()
1245+
.success();
1246+
1247+
// Verify the output contains all streams
1248+
let output_path = root_path.join("concatenated");
1249+
let output_content = fs::read(&output_path).unwrap();
1250+
let expected_content: Vec<u8> = chunks.iter().flat_map(|c| c.iter().copied()).collect();
1251+
assert_eq!(
1252+
output_content, expected_content,
1253+
"Decompressed content should contain all concatenated {extension} streams"
1254+
);
1255+
}
1256+
1257+
/// Test that concatenated gzip streams are fully decompressed (issue #855)
1258+
#[test]
1259+
fn decompress_concatenated_gzip_streams() {
1260+
use flate2::write::GzEncoder;
1261+
use flate2::Compression;
1262+
use std::io::Write;
1263+
1264+
test_concatenated_streams("gz", |data| {
1265+
let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
1266+
encoder.write_all(data).unwrap();
1267+
encoder.finish().unwrap()
1268+
});
1269+
}
1270+
1271+
/// Test that concatenated bzip2 streams are fully decompressed (related to issue #855)
1272+
#[test]
1273+
fn decompress_concatenated_bzip2_streams() {
1274+
use bzip2::write::BzEncoder;
1275+
use bzip2::Compression;
1276+
use std::io::Write;
1277+
1278+
test_concatenated_streams("bz2", |data| {
1279+
let mut encoder = BzEncoder::new(Vec::new(), Compression::default());
1280+
encoder.write_all(data).unwrap();
1281+
encoder.finish().unwrap()
1282+
});
1283+
}
1284+
1285+
/// Test that concatenated lz4 frames are fully decompressed (related to issue #855)
1286+
#[test]
1287+
fn decompress_concatenated_lz4_frames() {
1288+
use lz4_flex::frame::FrameEncoder;
1289+
use std::io::Write;
1290+
1291+
test_concatenated_streams("lz4", |data| {
1292+
let mut encoder = FrameEncoder::new(Vec::new());
1293+
encoder.write_all(data).unwrap();
1294+
encoder.finish().unwrap()
1295+
});
1296+
}

0 commit comments

Comments
 (0)