Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,4 @@ collab-plugins/.env.test

**/.DS_Store
.claude

.serena
92 changes: 44 additions & 48 deletions collab-importer/src/zip_tool/async_zip.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use async_zip::{StringEncoding, ZipString};
use futures::AsyncReadExt as FuturesAsyncReadExt;
use futures::io::AsyncBufRead;
use std::ffi::OsString;
use std::{io, str};
use std::str;

use std::path::{Path, PathBuf};
use tokio::fs;
Expand All @@ -19,8 +19,11 @@ use tokio_util::compat::TokioAsyncReadCompatExt;
use tokio_util::compat::TokioAsyncWriteCompatExt;

use crate::error::ImporterError;
use crate::zip_tool::util::{is_multi_part_zip_signature, remove_part_suffix, sanitize_file_path};
use tracing::error;
use crate::zip_tool::util::{
has_multi_part_extension, has_multi_part_suffix, is_multi_part_zip_signature, remove_part_suffix,
sanitize_file_path,
};
use tracing::{error, warn};

pub struct UnzipFile {
pub file_name: String,
Expand Down Expand Up @@ -74,12 +77,20 @@ where
if buffer.len() >= 4 {
if let Ok(four_bytes) = buffer[..4].try_into() {
if is_multi_part_zip_signature(four_bytes) {
if let Some(file_name) =
Path::new(&filename).file_stem().and_then(|s| s.to_str())
{
root_dir = Some(remove_part_suffix(file_name));
let is_multipart_candidate = filename.contains('/')
|| has_multi_part_extension(&filename)
|| has_multi_part_suffix(&filename);

if root_dir.is_none() && is_multipart_candidate {
if let Some(file_name) =
Path::new(&filename).file_stem().and_then(|s| s.to_str())
{
root_dir = Some(remove_part_suffix(file_name));
}
}
if is_multipart_candidate {
parts.push(output_path.clone());
}
parts.push(output_path.clone());
}
}
}
Expand Down Expand Up @@ -130,54 +141,39 @@ where
None => match default_file_name {
None => Err(ImporterError::FileNotFound),
Some(default_file_name) => {
let new_out_dir = out_dir
.parent()
.ok_or_else(|| ImporterError::FileNotFound)?
.join(uuid::Uuid::new_v4().to_string())
.join(&default_file_name);
move_all(&out_dir, &new_out_dir).await?;
let _ = fs::remove_dir_all(&out_dir).await;
if fs::metadata(&out_dir).await.is_err() {
fs::create_dir_all(&out_dir)
.await
.with_context(|| format!("Failed to create output directory: {}", out_dir.display()))?;
}
Ok(UnzipFile {
file_name: default_file_name.clone(),
unzip_dir_path: new_out_dir,
unzip_dir_path: out_dir,
parts,
})
},
},
Some(file_name) => Ok(UnzipFile {
file_name: file_name.clone(),
unzip_dir_path: out_dir.join(file_name),
parts,
}),
}
}

#[async_recursion]
async fn move_all(old_path: &Path, new_path: &Path) -> io::Result<()> {
if !new_path.exists() {
fs::create_dir_all(new_path).await?;
}

let mut read_dir = fs::read_dir(old_path).await?;
while let Some(entry) = read_dir.next_entry().await? {
let path = entry.path();
let file_name = match path.file_name() {
Some(name) => name,
None => continue,
};

let new_file_path = new_path.join(file_name);
if path.is_dir() {
if !new_file_path.exists() {
fs::create_dir_all(&new_file_path).await?;
Some(file_name) => {
let target_dir = out_dir.join(&file_name);
if fs::metadata(&target_dir).await.is_err() {
warn!(
"Root directory {:?} missing after unzip; falling back to {:?}",
target_dir, out_dir
);
return Ok(UnzipFile {
file_name,
unzip_dir_path: out_dir,
parts,
});
}
move_all(&path, &new_file_path).await?;
fs::remove_dir_all(&path).await?;
} else if path.is_file() {
fs::rename(&path, &new_file_path).await?;
}

Ok(UnzipFile {
file_name: file_name.clone(),
unzip_dir_path: target_dir,
parts,
})
},
}
Ok(())
}

pub fn get_filename_from_zip_string(zip_string: &ZipString) -> Result<String, anyhow::Error> {
Expand Down
57 changes: 39 additions & 18 deletions collab-importer/src/zip_tool/sync_zip.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
use crate::error::ImporterError;
use crate::zip_tool::util::{is_multi_part_zip_signature, remove_part_suffix, sanitize_file_path};
use crate::zip_tool::util::{
has_multi_part_extension, has_multi_part_suffix, is_multi_part_zip_signature, remove_part_suffix,
sanitize_file_path,
};
use anyhow::{Result, anyhow};

use std::fs::{File, OpenOptions};
Expand All @@ -17,7 +20,7 @@ pub struct UnzipFile {

pub fn sync_unzip(
file_path: PathBuf,
mut out_dir: PathBuf,
out_dir: PathBuf,
default_file_name: Option<String>,
) -> Result<UnzipFile, ImporterError> {
let file = File::open(file_path)
Expand All @@ -37,14 +40,9 @@ pub fn sync_unzip(
}
}

if root_dir.is_none() {
if let Some(default_name) = &default_file_name {
out_dir = out_dir.join(default_name);
if !out_dir.exists() {
fs::create_dir_all(&out_dir)
.map_err(|e| ImporterError::Internal(anyhow!("Failed to create dir: {:?}", e)))?;
}
}
if !out_dir.exists() {
fs::create_dir_all(&out_dir)
.map_err(|e| ImporterError::Internal(anyhow!("Failed to create dir: {:?}", e)))?;
}

// Iterate through each file in the archive
Expand Down Expand Up @@ -92,10 +90,18 @@ pub fn sync_unzip(
if buffer.len() >= 4 {
let four_bytes: [u8; 4] = buffer[..4].try_into().unwrap();
if is_multi_part_zip_signature(&four_bytes) {
if let Some(file_name) = Path::new(&filename).file_stem().and_then(|s| s.to_str()) {
root_dir = Some(remove_part_suffix(file_name));
let is_multipart_candidate = filename.contains('/')
|| has_multi_part_extension(&filename)
|| has_multi_part_suffix(&filename);

if root_dir.is_none() && is_multipart_candidate {
if let Some(file_name) = Path::new(&filename).file_stem().and_then(|s| s.to_str()) {
root_dir = Some(remove_part_suffix(file_name));
}
}
if is_multipart_candidate {
parts.push(output_path.clone());
}
parts.push(output_path.clone());
}
}

Expand Down Expand Up @@ -130,11 +136,26 @@ pub fn sync_unzip(
parts,
}),
},
Some(root_dir) => Ok(UnzipFile {
dir_name: root_dir.clone(),
unzip_dir: out_dir.join(root_dir),
parts,
}),
Some(root_dir) => {
let target_dir = out_dir.join(&root_dir);
if !target_dir.exists() {
warn!(
"Root directory {:?} missing after unzip; falling back to {:?}",
target_dir, out_dir
);
return Ok(UnzipFile {
dir_name: root_dir,
unzip_dir: out_dir,
parts,
});
}

Ok(UnzipFile {
dir_name: root_dir.clone(),
unzip_dir: target_dir,
parts,
})
},
}
}

Expand Down
42 changes: 42 additions & 0 deletions collab-importer/src/zip_tool/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,32 @@ pub fn sanitize_file_path(path: &str) -> PathBuf {
.collect()
}

/// Determine whether the provided file path ends with a known multi-part archive extension.
pub fn has_multi_part_extension(file_name: &str) -> bool {
Path::new(file_name)
.extension()
.and_then(|ext| ext.to_str())
.map(|ext| {
let ext = ext.to_ascii_lowercase();
(ext.starts_with('z') && ext.chars().skip(1).all(|c| c.is_ascii_digit()))
|| (ext.starts_with("part") && ext.chars().skip(4).all(|c| c.is_ascii_digit()))
})
.unwrap_or(false)
}

/// Identify multi-part style suffixes that appear before the file extension.
pub fn has_multi_part_suffix(file_name: &str) -> bool {
if let Some(file_name) = Path::new(file_name).file_name().and_then(|s| s.to_str()) {
let patterns = [r"(?i)-part-\d+", r"(?i)\.part\d+", r"(?i)\.z\d{2}"];
return patterns.iter().any(|pattern| {
let re = Regex::new(pattern).unwrap();
re.is_match(file_name).unwrap_or(false)
});
}

false
}

pub fn remove_part_suffix(file_name: &str) -> String {
let path = Path::new(file_name);
if let Some(stem) = path.file_stem() {
Expand Down Expand Up @@ -97,4 +123,20 @@ mod tests {
);
}
}

#[test]
fn test_has_multi_part_extension() {
assert!(has_multi_part_extension("Export-Part.z01"));
assert!(has_multi_part_extension("export.part1"));
assert!(!has_multi_part_extension("Attachment.zip"));
assert!(!has_multi_part_extension("regular.txt"));
}

#[test]
fn test_has_multi_part_suffix() {
assert!(has_multi_part_suffix("Export-Part-1.zip"));
assert!(has_multi_part_suffix("nested/Export.Part2"));
assert!(!has_multi_part_suffix("Attachment.zip"));
assert!(!has_multi_part_suffix("duplicate(1).zip"));
}
}
1 change: 1 addition & 0 deletions collab-importer/tests/main.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
mod notion_test;
mod util;
mod workspace;
mod zip_tool;
Loading
Loading