Skip to content

Commit 433ed82

Browse files
committed
chore: ignore duplicate file when unzip
1 parent a079deb commit 433ed82

File tree

3 files changed

+96
-6
lines changed

3 files changed

+96
-6
lines changed

collab/src/importer/zip_tool/async_zip.rs

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ use crate::importer::zip_tool::util::{
2424
has_multi_part_extension, has_multi_part_suffix, is_multi_part_zip_signature, remove_part_suffix,
2525
sanitize_file_path,
2626
};
27-
use tracing::{error, warn};
27+
use tracing::{error, trace, warn};
2828

2929
pub struct UnzipFile {
3030
pub file_name: String,
@@ -239,9 +239,17 @@ pub async fn unzip_single_file(
239239
create_dir_all(parent).await?;
240240
}
241241
}
242+
if path.exists() {
243+
trace!(
244+
"File {:?} already exists when extracting multipart entry (async); overwriting",
245+
path
246+
);
247+
}
248+
242249
let writer = OpenOptions::new()
243250
.write(true)
244-
.create_new(true)
251+
.create(true)
252+
.truncate(true)
245253
.open(&path)
246254
.await?;
247255
io::copy(&mut entry_reader, &mut writer.compat_write()).await?;

collab/src/importer/zip_tool/sync_zip.rs

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -69,13 +69,21 @@ pub fn sync_unzip(
6969
}
7070

7171
// Create and write the file
72+
if output_path.exists() {
73+
trace!(
74+
"File {:?} already exists; overwriting extracted content",
75+
output_path
76+
);
77+
}
78+
7279
match OpenOptions::new()
7380
.write(true)
74-
.create_new(true)
81+
.create(true)
82+
.truncate(true)
7583
.open(&output_path)
7684
.map_err(|e| {
7785
CollabError::Internal(anyhow!(
78-
"Failed to create or open file with path: {:?}, error: {:?}",
86+
"Failed to create or overwrite file with path: {:?}, error: {:?}",
7987
output_path,
8088
e
8189
))
@@ -207,13 +215,21 @@ fn unzip_single_file(
207215
}
208216

209217
// Create and write the file
218+
if path.exists() {
219+
trace!(
220+
"File {:?} already exists when extracting multipart entry; overwriting",
221+
path
222+
);
223+
}
224+
210225
let mut outfile = OpenOptions::new()
211226
.write(true)
212-
.create_new(true)
227+
.create(true)
228+
.truncate(true)
213229
.open(&path)
214230
.map_err(|e| {
215231
CollabError::Internal(anyhow!(
216-
"Failed to create part file: {:?}, path:{:?}",
232+
"Failed to create or overwrite part file: {:?}, path:{:?}",
217233
e,
218234
path
219235
))

collab/tests/importer/zip_tool.rs

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,28 @@ fn sync_unzip_falls_back_when_root_directory_missing() -> Result<()> {
5858
Ok(())
5959
}
6060

61+
#[test]
62+
fn sync_unzip_overwrites_duplicate_files() -> Result<()> {
63+
let temp = tempdir()?;
64+
let zip_path = temp.path().join("duplicate_files.zip");
65+
create_zip_with_duplicate_file(&zip_path)?;
66+
67+
let output_dir = temp.path().join("output");
68+
std::fs::create_dir_all(&output_dir)?;
69+
70+
let unzip_file = sync_unzip(
71+
zip_path.clone(),
72+
output_dir.clone(),
73+
Some(FALLBACK_DIR.to_string()),
74+
)?;
75+
76+
let duplicated_file = unzip_file.unzip_dir.join("duplicate.csv");
77+
let content = std::fs::read_to_string(duplicated_file)?;
78+
assert_eq!(content, "second");
79+
80+
Ok(())
81+
}
82+
6183
#[tokio::test]
6284
async fn async_unzip_preserves_root_directory_with_nested_zip() -> Result<()> {
6385
let temp = tempdir()?;
@@ -121,6 +143,33 @@ async fn async_unzip_falls_back_when_root_directory_missing() -> Result<()> {
121143
Ok(())
122144
}
123145

146+
#[tokio::test]
147+
async fn async_unzip_overwrites_duplicate_files() -> Result<()> {
148+
let temp = tempdir()?;
149+
let zip_path = temp.path().join("async_duplicate_files.zip");
150+
create_zip_with_duplicate_file(&zip_path)?;
151+
152+
let file = tokio::fs::File::open(&zip_path).await?;
153+
let reader = tokio::io::BufReader::new(file).compat();
154+
let zip_reader = async_zip::base::read::stream::ZipFileReader::new(reader);
155+
156+
let output_dir = temp.path().join("async_output_duplicate");
157+
tokio::fs::create_dir_all(&output_dir).await?;
158+
159+
let unzip_file = async_unzip(
160+
zip_reader,
161+
output_dir.clone(),
162+
Some(FALLBACK_DIR.to_string()),
163+
)
164+
.await?;
165+
166+
let duplicated_file = unzip_file.unzip_dir_path.join("duplicate.csv");
167+
let content = tokio::fs::read_to_string(duplicated_file).await?;
168+
assert_eq!(content, "second");
169+
170+
Ok(())
171+
}
172+
124173
fn create_zip_with_root_dir(zip_path: &Path) -> Result<()> {
125174
let file = std::fs::File::create(zip_path)?;
126175
let mut writer = ZipWriter::new(file);
@@ -152,6 +201,23 @@ fn create_zip_without_root_dir(zip_path: &Path) -> Result<()> {
152201
Ok(())
153202
}
154203

204+
fn create_zip_with_duplicate_file(zip_path: &Path) -> Result<()> {
205+
let file = std::fs::File::create(zip_path)?;
206+
let mut writer = ZipWriter::new(file);
207+
let options = FileOptions::default().compression_method(CompressionMethod::Stored);
208+
209+
writer.add_directory(format!("{ROOT_DIR}/"), options)?;
210+
211+
writer.start_file(format!("{ROOT_DIR}/duplicate.csv"), options)?;
212+
writer.write_all(b"first")?;
213+
214+
writer.start_file(format!("{ROOT_DIR}/duplicate.csv"), options)?;
215+
writer.write_all(b"second")?;
216+
217+
writer.finish()?;
218+
Ok(())
219+
}
220+
155221
fn create_nested_zip_bytes(name: &str) -> Vec<u8> {
156222
let cursor = Cursor::new(Vec::new());
157223
let mut nested_writer = ZipWriter::new(cursor);

0 commit comments

Comments
 (0)