Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 30 additions & 2 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use colored::*;
use ia_get::archive_metadata::{parse_xml_files, XmlFiles};
use ia_get::constants::USER_AGENT;
use ia_get::downloader;
use ia_get::utils::{create_spinner, validate_archive_url};
use ia_get::utils::{create_spinner, sanitize_filename, validate_archive_url};
use ia_get::Result;
use indicatif::ProgressStyle;
use reqwest::Client; // Add this line
Expand Down Expand Up @@ -178,6 +178,7 @@ async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
spinner.finish();

// Prepare download data for batch processing
let mut sanitized_count = 0;
let download_data = files
.files
.into_iter()
Expand All @@ -186,10 +187,37 @@ async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
if let Ok(joined_url) = absolute_url.join(&file.name) {
absolute_url = joined_url;
}
(absolute_url.to_string(), file.name, file.md5)

// Sanitize filename for filesystem compatibility
let (sanitized_name, was_modified) = sanitize_filename(&file.name);

// Warn user if filename was modified
if was_modified {
println!(
"{} {} {} → {}",
"⚠".yellow().bold(),
"Sanitized:".yellow(),
file.name.dimmed(),
sanitized_name.bold()
);
sanitized_count += 1;
}

(absolute_url.to_string(), sanitized_name, file.md5)
})
.collect::<Vec<_>>();

// Show summary if any files were sanitized
if sanitized_count > 0 {
println!(
"\n{} {} {} file{} for filesystem compatibility",
"✓".green().bold(),
"Sanitized".bold(),
sanitized_count.to_string().bold(),
if sanitized_count == 1 { "" } else { "s" }
);
}

// Download all files with integrated signal handling
downloader::download_files(&client, download_data.clone(), download_data.len()).await?;

Expand Down
324 changes: 324 additions & 0 deletions src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -163,3 +163,327 @@ pub fn format_transfer_rate(bytes_per_sec: f64) -> (f64, &'static str) {
(bytes_per_sec / GB, "GB")
}
}

/// Sanitizes a filename for cross-platform filesystem compatibility
///
/// Replaces characters that are invalid on Windows or Unix filesystems
/// with underscores, while preserving path separators.
///
/// Invalid characters replaced with underscores:
/// - Windows: `< > : " | ? *` and control characters (0-31)
/// - Unix: null character (\0)
/// - Both: leading/trailing spaces, trailing dots in path components
///
/// Also handles Windows reserved names (CON, PRN, AUX, NUL, COM1-9, LPT1-9)
/// by appending an underscore.
///
/// # Arguments
/// * `filename` - The original filename (may include path components separated by `/`)
///
/// # Returns
/// * `(sanitized_filename, was_modified)` - Tuple of cleaned filename and whether it was changed
///
/// # Examples
/// ```
/// use ia_get::utils::sanitize_filename;
///
/// let (sanitized, modified) = sanitize_filename("normal_file.txt");
/// assert_eq!(sanitized, "normal_file.txt");
/// assert!(!modified);
///
/// let (sanitized, modified) = sanitize_filename("file?name.txt");
/// assert_eq!(sanitized, "file_name.txt");
/// assert!(modified);
///
/// let (sanitized, modified) = sanitize_filename("Season 1/Episode?.mp4");
/// assert_eq!(sanitized, "Season 1/Episode_.mp4");
/// assert!(modified);
/// ```
pub fn sanitize_filename(filename: &str) -> (String, bool) {
// Windows reserved names (case-insensitive)
const RESERVED_NAMES: &[&str] = &[
"CON", "PRN", "AUX", "NUL", "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7", "COM8",
"COM9", "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9",
];

let mut was_modified = false;
let mut result = String::with_capacity(filename.len());

// Process each path component separately to preserve directory structure
let components: Vec<&str> = filename.split('/').collect();
let mut first_component = true;

for component in components.iter() {
// Skip empty components (e.g., from leading/trailing slashes or "//" sequences)
if component.is_empty() {
if !filename.is_empty() {
was_modified = true;
}
continue;
}

// Add separator before non-first components
if !first_component {
result.push('/');
}
first_component = false;

let mut sanitized_component = String::with_capacity(component.len());

// Replace invalid characters
for ch in component.chars() {
match ch {
// Windows invalid characters
'<' | '>' | ':' | '"' | '|' | '?' | '*' => {
sanitized_component.push('_');
was_modified = true;
}
// Backslash (path separator on Windows, invalid in filenames on Unix)
'\\' => {
sanitized_component.push('_');
was_modified = true;
}
// Control characters (0-31) and DEL (127)
'\x00'..='\x1F' | '\x7F' => {
sanitized_component.push('_');
was_modified = true;
}
// Valid character
_ => sanitized_component.push(ch),
}
}

// Trim leading/trailing spaces
let trimmed = sanitized_component.trim();
if trimmed.len() != sanitized_component.len() {
was_modified = true;
sanitized_component = trimmed.to_string();
}

// Trim trailing dots (Windows doesn't allow filenames ending with dots)
let trimmed_dots = sanitized_component.trim_end_matches('.');
if trimmed_dots.len() != sanitized_component.len() {
was_modified = true;
sanitized_component = trimmed_dots.to_string();
}

// Handle empty components after sanitization
if sanitized_component.is_empty() {
sanitized_component = "_".to_string();
was_modified = true;
}

// Check for Windows reserved names
// Split by '.' to check the base name (before extension)
let dot_pos = sanitized_component.find('.');
let base_name = if let Some(pos) = dot_pos {
&sanitized_component[..pos]
} else {
&sanitized_component
};

if RESERVED_NAMES
.iter()
.any(|&reserved| base_name.eq_ignore_ascii_case(reserved))
{
// Insert underscore after base name, before extension
if let Some(pos) = dot_pos {
sanitized_component.insert(pos, '_');
} else {
sanitized_component.push('_');
}
was_modified = true;
}

result.push_str(&sanitized_component);
}

// Remove trailing slash if present (unless it's just "/")
if result.len() > 1 && result.ends_with('/') {
result.pop();
was_modified = true;
}

// Check if result differs from original
if !was_modified {
was_modified = result != filename;
}

(result, was_modified)
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_sanitize_valid_filename() {
let (result, modified) = sanitize_filename("normal_file-name.txt");
assert_eq!(result, "normal_file-name.txt");
assert!(!modified);
}

#[test]
fn test_sanitize_valid_filename_with_path() {
let (result, modified) = sanitize_filename("folder/subfolder/file.txt");
assert_eq!(result, "folder/subfolder/file.txt");
assert!(!modified);
}

#[test]
fn test_sanitize_invalid_characters() {
let (result, modified) = sanitize_filename("file?name:test<>.txt");
assert_eq!(result, "file_name_test__.txt");
assert!(modified);
}

#[test]
fn test_sanitize_question_mark() {
let (result, modified) = sanitize_filename("Episode?.mp4");
assert_eq!(result, "Episode_.mp4");
assert!(modified);
}

#[test]
fn test_sanitize_with_path() {
let (result, modified) = sanitize_filename("Season 1/Episode?.mp4");
assert_eq!(result, "Season 1/Episode_.mp4");
assert!(modified);
}

#[test]
fn test_sanitize_multiple_invalid_in_path() {
let (result, modified) = sanitize_filename("Folder:Name/File*Name?.txt");
assert_eq!(result, "Folder_Name/File_Name_.txt");
assert!(modified);
}

#[test]
fn test_sanitize_windows_reserved_names() {
let (result, modified) = sanitize_filename("CON.txt");
assert_eq!(result, "CON_.txt");
assert!(modified);

let (result, modified) = sanitize_filename("con.txt");
assert_eq!(result, "con_.txt");
assert!(modified);

let (result, modified) = sanitize_filename("PRN");
assert_eq!(result, "PRN_");
assert!(modified);

let (result, modified) = sanitize_filename("aux.log");
assert_eq!(result, "aux_.log");
assert!(modified);

let (result, modified) = sanitize_filename("COM1.dat");
assert_eq!(result, "COM1_.dat");
assert!(modified);

let (result, modified) = sanitize_filename("LPT9.txt");
assert_eq!(result, "LPT9_.txt");
assert!(modified);
}

#[test]
fn test_sanitize_reserved_in_path() {
let (result, modified) = sanitize_filename("folder/CON.txt");
assert_eq!(result, "folder/CON_.txt");
assert!(modified);
}

#[test]
fn test_sanitize_control_characters() {
let (result, modified) = sanitize_filename("file\x00\x1fname.txt");
assert_eq!(result, "file__name.txt");
assert!(modified);

let (result, modified) = sanitize_filename("test\x7Ffile.txt");
assert_eq!(result, "test_file.txt");
assert!(modified);
}

#[test]
fn test_sanitize_backslash() {
let (result, modified) = sanitize_filename("folder\\file.txt");
assert_eq!(result, "folder_file.txt");
assert!(modified);
}

#[test]
fn test_sanitize_whitespace_edge_cases() {
let (result, modified) = sanitize_filename(" leading.txt ");
assert_eq!(result, "leading.txt");
assert!(modified);

let (result, modified) = sanitize_filename("folder/ spaces /file.txt");
assert_eq!(result, "folder/spaces/file.txt");
assert!(modified);
}

#[test]
fn test_sanitize_trailing_dots() {
let (result, modified) = sanitize_filename("file...");
assert_eq!(result, "file");
assert!(modified);

let (result, modified) = sanitize_filename("folder./file.txt");
assert_eq!(result, "folder/file.txt");
assert!(modified);
}

#[test]
fn test_sanitize_empty_components() {
let (result, modified) = sanitize_filename("folder//file.txt");
assert_eq!(result, "folder/file.txt");
assert!(modified);

let (result, modified) = sanitize_filename("/folder/file.txt");
assert_eq!(result, "folder/file.txt");
assert!(modified);

let (result, modified) = sanitize_filename("folder/file.txt/");
assert_eq!(result, "folder/file.txt");
assert!(modified);
}

#[test]
fn test_sanitize_all_invalid() {
let (result, modified) = sanitize_filename("???");
assert_eq!(result, "___");
assert!(modified);
}

#[test]
fn test_sanitize_unicode() {
let (result, modified) = sanitize_filename("файл.txt");
assert_eq!(result, "файл.txt");
assert!(!modified);

let (result, modified) = sanitize_filename("文件.txt");
assert_eq!(result, "文件.txt");
assert!(!modified);

let (result, modified) = sanitize_filename("emoji😀.txt");
assert_eq!(result, "emoji😀.txt");
assert!(!modified);
}

#[test]
fn test_sanitize_mixed_valid_invalid() {
let (result, modified) =
sanitize_filename("Red vs. Blue - Season 1/Episode 1: Why Are We Here?.mp4");
assert_eq!(
result,
"Red vs. Blue - Season 1/Episode 1_ Why Are We Here_.mp4"
);
assert!(modified);
}

#[test]
fn test_sanitize_preserves_extension() {
let (result, modified) = sanitize_filename("file:name.tar.gz");
assert_eq!(result, "file_name.tar.gz");
assert!(modified);
}
}
Loading