From a8ddc33927cee73a4a91b9f2244eaa88e16aa862 Mon Sep 17 00:00:00 2001 From: hennickc Date: Thu, 5 Jun 2025 16:50:39 -0700 Subject: [PATCH 1/2] fix: Need to include zip64 extra field in central directory --- src/read.rs | 65 ++++++++------- src/write.rs | 12 ++- tests/append_near_4gb.rs | 171 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 216 insertions(+), 32 deletions(-) create mode 100644 tests/append_near_4gb.rs diff --git a/src/read.rs b/src/read.rs index d59874d89..142e52f61 100644 --- a/src/read.rs +++ b/src/read.rs @@ -1345,13 +1345,7 @@ fn central_header_to_zip_file_inner( aes_extra_data_start: 0, extra_fields: Vec::new(), }; - match parse_extra_field(&mut result) { - Ok(stripped_extra_field) => { - result.extra_field = stripped_extra_field; - } - Err(ZipError::Io(..)) => {} - Err(e) => return Err(e), - } + parse_extra_field(&mut result)?; let aes_enabled = result.compression_method == CompressionMethod::AES; if aes_enabled && result.aes_mode.is_none() { @@ -1367,33 +1361,42 @@ fn central_header_to_zip_file_inner( Ok(result) } -pub(crate) fn parse_extra_field(file: &mut ZipFileData) -> ZipResult>>> { - let Some(ref extra_field) = file.extra_field else { - return Ok(None); - }; - let extra_field = extra_field.clone(); - let mut processed_extra_field = extra_field.clone(); - let len = extra_field.len(); - let mut reader = io::Cursor::new(&**extra_field); - - /* TODO: codify this structure into Zip64ExtraFieldBlock fields! */ - let mut position = reader.position() as usize; - while (position) < len { - let old_position = position; - let remove = parse_single_extra_field(file, &mut reader, position as u64, false)?; - position = reader.position() as usize; - if remove { - let remaining = len - (position - old_position); - if remaining == 0 { - return Ok(None); +pub(crate) fn parse_extra_field(file: &mut ZipFileData) -> ZipResult<()> { + let mut extra_field = file.extra_field.clone(); + let mut central_extra_field = file.central_extra_field.clone(); + for field_group in [&mut extra_field, &mut central_extra_field] { + let Some(extra_field) = field_group else { + continue; + }; + let mut modified = false; + let mut processed_extra_field = vec![]; + let len = extra_field.len(); + let mut reader = io::Cursor::new(&**extra_field); + + let mut position = reader.position(); + while position < len as u64 { + let old_position = position; + let remove = parse_single_extra_field(file, &mut reader, position, false)?; + position = reader.position(); + if remove { + modified = true; + } else { + let field_len = (position - old_position) as usize; + let write_start = processed_extra_field.len(); + reader.seek(SeekFrom::Start(old_position))?; + processed_extra_field.extend_from_slice(&vec![0u8; field_len]); + reader.read_exact( + &mut processed_extra_field[write_start..(write_start + field_len)], + )?; } - let mut new_extra_field = Vec::with_capacity(remaining); - new_extra_field.extend_from_slice(&extra_field[0..old_position]); - new_extra_field.extend_from_slice(&extra_field[position..]); - processed_extra_field = Arc::new(new_extra_field); + } + if modified { + *field_group = Some(Arc::new(processed_extra_field)); } } - Ok(Some(processed_extra_field)) + file.extra_field = extra_field; + file.central_extra_field = central_extra_field; + Ok(()) } pub(crate) fn parse_single_extra_field( diff --git a/src/write.rs b/src/write.rs index 72a15e087..12bc6eda4 100644 --- a/src/write.rs +++ b/src/write.rs @@ -889,7 +889,7 @@ impl ZipWriter { fn start_entry( &mut self, name: S, - options: FileOptions, + mut options: FileOptions, raw_values: Option, ) -> ZipResult<()> { self.finish_file()?; @@ -901,6 +901,13 @@ impl ZipWriter { uncompressed_size: 0, }); + // Check if we're close to the 4GB boundary and force ZIP64 if needed + // This ensures we properly handle appending to files close to 4GB + if header_start > spec::ZIP64_BYTES_THR { + // Files that start on or past the 4GiB boundary are always ZIP64 + options.large_file = true; + } + let mut extra_data = match options.extended_options.extra_data() { Some(data) => data.to_vec(), None => vec![], @@ -1946,6 +1953,9 @@ fn write_central_directory_header(writer: &mut T, file: &ZipFileData) // file name writer.write_all(&file.file_name_raw)?; // extra field + if let Some(zip64_extra_field) = &file.zip64_extra_field_block() { + writer.write_all(&zip64_extra_field.serialize())?; + } if let Some(extra_field) = &file.extra_field { writer.write_all(extra_field)?; } diff --git a/tests/append_near_4gb.rs b/tests/append_near_4gb.rs new file mode 100644 index 000000000..13409e9f6 --- /dev/null +++ b/tests/append_near_4gb.rs @@ -0,0 +1,171 @@ +use std::{fs::File, io::Write}; +use tempfile::tempdir; +use zip::{write::SimpleFileOptions, ZipWriter}; + +fn write_data(w: &mut dyn Write, size: usize) { + let chunks = 1 << 20; // 1MB chunks + let mut written = 0; + let buf = vec![0x21; chunks]; + while written < size { + let to_write = (size - written).min(chunks); + w.write_all(&buf[..to_write]).unwrap(); + written += to_write; + } +} + +#[test] +fn test_append_near_4gb() { + let dir = tempdir().unwrap(); + let path = dir.path().join("large-then-small.zip"); + + // Create a new zip file with a large file close to 4GB + { + let file = File::create(&path).unwrap(); + let mut writer = ZipWriter::new(file); + + let opts = SimpleFileOptions::default().compression_method(zip::CompressionMethod::Stored); + + writer.start_file_from_path("close_to_4gb", opts).unwrap(); + + // Write a file that's just under 4GB (4GB - 32KB) + let size = (4u64 << 30) - (1 << 15); + write_data(&mut writer, size as usize); + + // Add a small file + writer.start_file_from_path("small_file", opts).unwrap(); + write_data(&mut writer, 1024); + + writer.finish().unwrap(); + } + + // Now append to the zip file + { + let file = File::options().read(true).write(true).open(&path).unwrap(); + let mut writer = ZipWriter::new_append(file).unwrap(); + + let opts = SimpleFileOptions::default().compression_method(zip::CompressionMethod::Stored); + + // Add another small file + writer.start_file_from_path("appended_file", opts).unwrap(); + write_data(&mut writer, 1024); + + writer.finish().unwrap(); + } + + // Verify the zip file is valid by reading it + { + let file = File::open(&path).unwrap(); + let mut archive = zip::ZipArchive::new(file).unwrap(); + + assert_eq!(archive.len(), 3); + assert!(!archive.has_overlapping_files().unwrap()); + assert!(archive.file_names().any(|name| name == "close_to_4gb")); + assert!(archive.file_names().any(|name| name == "small_file")); + assert!(archive.file_names().any(|name| name == "appended_file")); + } +} + +#[test] +fn test_append_near_4gb_with_1gb_files() { + let dir = tempdir().unwrap(); + let path = dir.path().join("large-then-small.zip"); + + // Create a new zip file with a large file close to 4GB + { + let file = File::create(&path).unwrap(); + let mut writer = ZipWriter::new(file); + + let opts = SimpleFileOptions::default().compression_method(zip::CompressionMethod::Stored); + + for i in 0..=3 { + writer + .start_file_from_path(format!("close_to_4gb_{i}"), opts) + .unwrap(); + + // Write a file that's just under 1GB (1GB - 32KB) + let size = (1u64 << 30) - (1 << 15); + write_data(&mut writer, size as usize); + } + + // Add a small file + writer.start_file_from_path("small_file", opts).unwrap(); + write_data(&mut writer, 1024); + + writer.finish().unwrap(); + } + + // Now append to the zip file + { + let file = File::options().read(true).write(true).open(&path).unwrap(); + let mut writer = ZipWriter::new_append(file).unwrap(); + + let opts = SimpleFileOptions::default().compression_method(zip::CompressionMethod::Stored); + + // Add another small file + writer.start_file_from_path("appended_file", opts).unwrap(); + write_data(&mut writer, 1024); + + writer.finish().unwrap(); + } + + // Verify the zip file is valid by reading it + { + let file = File::open(&path).unwrap(); + let mut archive = zip::ZipArchive::new(file).unwrap(); + + assert_eq!(archive.len(), 6); + assert!(!archive.has_overlapping_files().unwrap()); + assert!(archive.file_names().any(|name| name == "close_to_4gb_0")); + assert!(archive.file_names().any(|name| name == "close_to_4gb_1")); + assert!(archive.file_names().any(|name| name == "close_to_4gb_2")); + assert!(archive.file_names().any(|name| name == "close_to_4gb_3")); + assert!(archive.file_names().any(|name| name == "small_file")); + assert!(archive.file_names().any(|name| name == "appended_file")); + } +} + +// A smaller test that doesn't create a 4GB file but still tests the logic +#[test] +fn test_append_with_large_file_flag() { + let dir = tempdir().unwrap(); + let path = dir.path().join("test.zip"); + + // Create a new zip file + { + let file = File::create(&path).unwrap(); + let mut writer = ZipWriter::new(file); + + let opts = SimpleFileOptions::default() + .compression_method(zip::CompressionMethod::Stored) + .large_file(true); // Force ZIP64 format + + writer.start_file_from_path("file1", opts).unwrap(); + write_data(&mut writer, 1024); + + writer.finish().unwrap(); + } + + // Now append to the zip file + { + let file = File::options().read(true).write(true).open(&path).unwrap(); + let mut writer = ZipWriter::new_append(file).unwrap(); + + let opts = SimpleFileOptions::default().compression_method(zip::CompressionMethod::Stored); + + // Add another file + writer.start_file_from_path("file2", opts).unwrap(); + write_data(&mut writer, 1024); + + writer.finish().unwrap(); + } + + // Verify the zip file is valid by reading it + { + let file = File::open(&path).unwrap(); + let archive = zip::ZipArchive::new(file).unwrap(); + + assert_eq!(archive.len(), 2); + assert!(archive.file_names().any(|name| name == "file1")); + assert!(archive.file_names().any(|name| name == "file2")); + } +} From 320f6f2620784b2316081bbd5a5572ba660b340a Mon Sep 17 00:00:00 2001 From: hennickc Date: Thu, 5 Jun 2025 17:34:22 -0700 Subject: [PATCH 2/2] test: Fix an issue where some tests passed even without the fix --- tests/append_near_4gb.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/append_near_4gb.rs b/tests/append_near_4gb.rs index 13409e9f6..3505f73d0 100644 --- a/tests/append_near_4gb.rs +++ b/tests/append_near_4gb.rs @@ -27,8 +27,8 @@ fn test_append_near_4gb() { writer.start_file_from_path("close_to_4gb", opts).unwrap(); - // Write a file that's just under 4GB (4GB - 32KB) - let size = (4u64 << 30) - (1 << 15); + // Write a file that's just under 4GB (4GB - 1 byte) + let size = u32::MAX; write_data(&mut writer, size as usize); // Add a small file @@ -70,7 +70,7 @@ fn test_append_near_4gb_with_1gb_files() { let dir = tempdir().unwrap(); let path = dir.path().join("large-then-small.zip"); - // Create a new zip file with a large file close to 4GB + // Create a new zip file with 4 files totaling 1GB { let file = File::create(&path).unwrap(); let mut writer = ZipWriter::new(file); @@ -82,8 +82,8 @@ fn test_append_near_4gb_with_1gb_files() { .start_file_from_path(format!("close_to_4gb_{i}"), opts) .unwrap(); - // Write a file that's just under 1GB (1GB - 32KB) - let size = (1u64 << 30) - (1 << 15); + // Write a file that's 1 GB + let size = 1u64 << 30; write_data(&mut writer, size as usize); }