@@ -163,3 +163,327 @@ pub fn format_transfer_rate(bytes_per_sec: f64) -> (f64, &'static str) {
163163 ( bytes_per_sec / GB , "GB" )
164164 }
165165}
166+
167+ /// Sanitizes a filename for cross-platform filesystem compatibility
168+ ///
169+ /// Replaces characters that are invalid on Windows or Unix filesystems
170+ /// with underscores, while preserving path separators.
171+ ///
172+ /// Invalid characters replaced with underscores:
173+ /// - Windows: `< > : " | ? *` and control characters (0-31)
174+ /// - Unix: null character (\0)
175+ /// - Both: leading/trailing spaces, trailing dots in path components
176+ ///
177+ /// Also handles Windows reserved names (CON, PRN, AUX, NUL, COM1-9, LPT1-9)
178+ /// by appending an underscore.
179+ ///
180+ /// # Arguments
181+ /// * `filename` - The original filename (may include path components separated by `/`)
182+ ///
183+ /// # Returns
184+ /// * `(sanitized_filename, was_modified)` - Tuple of cleaned filename and whether it was changed
185+ ///
186+ /// # Examples
187+ /// ```
188+ /// use ia_get::utils::sanitize_filename;
189+ ///
190+ /// let (sanitized, modified) = sanitize_filename("normal_file.txt");
191+ /// assert_eq!(sanitized, "normal_file.txt");
192+ /// assert!(!modified);
193+ ///
194+ /// let (sanitized, modified) = sanitize_filename("file?name.txt");
195+ /// assert_eq!(sanitized, "file_name.txt");
196+ /// assert!(modified);
197+ ///
198+ /// let (sanitized, modified) = sanitize_filename("Season 1/Episode?.mp4");
199+ /// assert_eq!(sanitized, "Season 1/Episode_.mp4");
200+ /// assert!(modified);
201+ /// ```
202+ pub fn sanitize_filename ( filename : & str ) -> ( String , bool ) {
203+ // Windows reserved names (case-insensitive)
204+ const RESERVED_NAMES : & [ & str ] = & [
205+ "CON" , "PRN" , "AUX" , "NUL" , "COM1" , "COM2" , "COM3" , "COM4" , "COM5" , "COM6" , "COM7" , "COM8" ,
206+ "COM9" , "LPT1" , "LPT2" , "LPT3" , "LPT4" , "LPT5" , "LPT6" , "LPT7" , "LPT8" , "LPT9" ,
207+ ] ;
208+
209+ let mut was_modified = false ;
210+ let mut result = String :: with_capacity ( filename. len ( ) ) ;
211+
212+ // Process each path component separately to preserve directory structure
213+ let components: Vec < & str > = filename. split ( '/' ) . collect ( ) ;
214+ let mut first_component = true ;
215+
216+ for component in components. iter ( ) {
217+ // Skip empty components (e.g., from leading/trailing slashes or "//" sequences)
218+ if component. is_empty ( ) {
219+ if !filename. is_empty ( ) {
220+ was_modified = true ;
221+ }
222+ continue ;
223+ }
224+
225+ // Add separator before non-first components
226+ if !first_component {
227+ result. push ( '/' ) ;
228+ }
229+ first_component = false ;
230+
231+ let mut sanitized_component = String :: with_capacity ( component. len ( ) ) ;
232+
233+ // Replace invalid characters
234+ for ch in component. chars ( ) {
235+ match ch {
236+ // Windows invalid characters
237+ '<' | '>' | ':' | '"' | '|' | '?' | '*' => {
238+ sanitized_component. push ( '_' ) ;
239+ was_modified = true ;
240+ }
241+ // Backslash (path separator on Windows, invalid in filenames on Unix)
242+ '\\' => {
243+ sanitized_component. push ( '_' ) ;
244+ was_modified = true ;
245+ }
246+ // Control characters (0-31) and DEL (127)
247+ '\x00' ..='\x1F' | '\x7F' => {
248+ sanitized_component. push ( '_' ) ;
249+ was_modified = true ;
250+ }
251+ // Valid character
252+ _ => sanitized_component. push ( ch) ,
253+ }
254+ }
255+
256+ // Trim leading/trailing spaces
257+ let trimmed = sanitized_component. trim ( ) ;
258+ if trimmed. len ( ) != sanitized_component. len ( ) {
259+ was_modified = true ;
260+ sanitized_component = trimmed. to_string ( ) ;
261+ }
262+
263+ // Trim trailing dots (Windows doesn't allow filenames ending with dots)
264+ let trimmed_dots = sanitized_component. trim_end_matches ( '.' ) ;
265+ if trimmed_dots. len ( ) != sanitized_component. len ( ) {
266+ was_modified = true ;
267+ sanitized_component = trimmed_dots. to_string ( ) ;
268+ }
269+
270+ // Handle empty components after sanitization
271+ if sanitized_component. is_empty ( ) {
272+ sanitized_component = "_" . to_string ( ) ;
273+ was_modified = true ;
274+ }
275+
276+ // Check for Windows reserved names
277+ // Split by '.' to check the base name (before extension)
278+ let dot_pos = sanitized_component. find ( '.' ) ;
279+ let base_name = if let Some ( pos) = dot_pos {
280+ & sanitized_component[ ..pos]
281+ } else {
282+ & sanitized_component
283+ } ;
284+
285+ if RESERVED_NAMES
286+ . iter ( )
287+ . any ( |& reserved| base_name. eq_ignore_ascii_case ( reserved) )
288+ {
289+ // Insert underscore after base name, before extension
290+ if let Some ( pos) = dot_pos {
291+ sanitized_component. insert ( pos, '_' ) ;
292+ } else {
293+ sanitized_component. push ( '_' ) ;
294+ }
295+ was_modified = true ;
296+ }
297+
298+ result. push_str ( & sanitized_component) ;
299+ }
300+
301+ // Remove trailing slash if present (unless it's just "/")
302+ if result. len ( ) > 1 && result. ends_with ( '/' ) {
303+ result. pop ( ) ;
304+ was_modified = true ;
305+ }
306+
307+ // Check if result differs from original
308+ if !was_modified {
309+ was_modified = result != filename;
310+ }
311+
312+ ( result, was_modified)
313+ }
314+
315+ #[ cfg( test) ]
316+ mod tests {
317+ use super :: * ;
318+
319+ #[ test]
320+ fn test_sanitize_valid_filename ( ) {
321+ let ( result, modified) = sanitize_filename ( "normal_file-name.txt" ) ;
322+ assert_eq ! ( result, "normal_file-name.txt" ) ;
323+ assert ! ( !modified) ;
324+ }
325+
326+ #[ test]
327+ fn test_sanitize_valid_filename_with_path ( ) {
328+ let ( result, modified) = sanitize_filename ( "folder/subfolder/file.txt" ) ;
329+ assert_eq ! ( result, "folder/subfolder/file.txt" ) ;
330+ assert ! ( !modified) ;
331+ }
332+
333+ #[ test]
334+ fn test_sanitize_invalid_characters ( ) {
335+ let ( result, modified) = sanitize_filename ( "file?name:test<>.txt" ) ;
336+ assert_eq ! ( result, "file_name_test__.txt" ) ;
337+ assert ! ( modified) ;
338+ }
339+
340+ #[ test]
341+ fn test_sanitize_question_mark ( ) {
342+ let ( result, modified) = sanitize_filename ( "Episode?.mp4" ) ;
343+ assert_eq ! ( result, "Episode_.mp4" ) ;
344+ assert ! ( modified) ;
345+ }
346+
347+ #[ test]
348+ fn test_sanitize_with_path ( ) {
349+ let ( result, modified) = sanitize_filename ( "Season 1/Episode?.mp4" ) ;
350+ assert_eq ! ( result, "Season 1/Episode_.mp4" ) ;
351+ assert ! ( modified) ;
352+ }
353+
354+ #[ test]
355+ fn test_sanitize_multiple_invalid_in_path ( ) {
356+ let ( result, modified) = sanitize_filename ( "Folder:Name/File*Name?.txt" ) ;
357+ assert_eq ! ( result, "Folder_Name/File_Name_.txt" ) ;
358+ assert ! ( modified) ;
359+ }
360+
361+ #[ test]
362+ fn test_sanitize_windows_reserved_names ( ) {
363+ let ( result, modified) = sanitize_filename ( "CON.txt" ) ;
364+ assert_eq ! ( result, "CON_.txt" ) ;
365+ assert ! ( modified) ;
366+
367+ let ( result, modified) = sanitize_filename ( "con.txt" ) ;
368+ assert_eq ! ( result, "con_.txt" ) ;
369+ assert ! ( modified) ;
370+
371+ let ( result, modified) = sanitize_filename ( "PRN" ) ;
372+ assert_eq ! ( result, "PRN_" ) ;
373+ assert ! ( modified) ;
374+
375+ let ( result, modified) = sanitize_filename ( "aux.log" ) ;
376+ assert_eq ! ( result, "aux_.log" ) ;
377+ assert ! ( modified) ;
378+
379+ let ( result, modified) = sanitize_filename ( "COM1.dat" ) ;
380+ assert_eq ! ( result, "COM1_.dat" ) ;
381+ assert ! ( modified) ;
382+
383+ let ( result, modified) = sanitize_filename ( "LPT9.txt" ) ;
384+ assert_eq ! ( result, "LPT9_.txt" ) ;
385+ assert ! ( modified) ;
386+ }
387+
388+ #[ test]
389+ fn test_sanitize_reserved_in_path ( ) {
390+ let ( result, modified) = sanitize_filename ( "folder/CON.txt" ) ;
391+ assert_eq ! ( result, "folder/CON_.txt" ) ;
392+ assert ! ( modified) ;
393+ }
394+
395+ #[ test]
396+ fn test_sanitize_control_characters ( ) {
397+ let ( result, modified) = sanitize_filename ( "file\x00 \x1f name.txt" ) ;
398+ assert_eq ! ( result, "file__name.txt" ) ;
399+ assert ! ( modified) ;
400+
401+ let ( result, modified) = sanitize_filename ( "test\x7F file.txt" ) ;
402+ assert_eq ! ( result, "test_file.txt" ) ;
403+ assert ! ( modified) ;
404+ }
405+
406+ #[ test]
407+ fn test_sanitize_backslash ( ) {
408+ let ( result, modified) = sanitize_filename ( "folder\\ file.txt" ) ;
409+ assert_eq ! ( result, "folder_file.txt" ) ;
410+ assert ! ( modified) ;
411+ }
412+
413+ #[ test]
414+ fn test_sanitize_whitespace_edge_cases ( ) {
415+ let ( result, modified) = sanitize_filename ( " leading.txt " ) ;
416+ assert_eq ! ( result, "leading.txt" ) ;
417+ assert ! ( modified) ;
418+
419+ let ( result, modified) = sanitize_filename ( "folder/ spaces /file.txt" ) ;
420+ assert_eq ! ( result, "folder/spaces/file.txt" ) ;
421+ assert ! ( modified) ;
422+ }
423+
424+ #[ test]
425+ fn test_sanitize_trailing_dots ( ) {
426+ let ( result, modified) = sanitize_filename ( "file..." ) ;
427+ assert_eq ! ( result, "file" ) ;
428+ assert ! ( modified) ;
429+
430+ let ( result, modified) = sanitize_filename ( "folder./file.txt" ) ;
431+ assert_eq ! ( result, "folder/file.txt" ) ;
432+ assert ! ( modified) ;
433+ }
434+
435+ #[ test]
436+ fn test_sanitize_empty_components ( ) {
437+ let ( result, modified) = sanitize_filename ( "folder//file.txt" ) ;
438+ assert_eq ! ( result, "folder/file.txt" ) ;
439+ assert ! ( modified) ;
440+
441+ let ( result, modified) = sanitize_filename ( "/folder/file.txt" ) ;
442+ assert_eq ! ( result, "folder/file.txt" ) ;
443+ assert ! ( modified) ;
444+
445+ let ( result, modified) = sanitize_filename ( "folder/file.txt/" ) ;
446+ assert_eq ! ( result, "folder/file.txt" ) ;
447+ assert ! ( modified) ;
448+ }
449+
450+ #[ test]
451+ fn test_sanitize_all_invalid ( ) {
452+ let ( result, modified) = sanitize_filename ( "???" ) ;
453+ assert_eq ! ( result, "___" ) ;
454+ assert ! ( modified) ;
455+ }
456+
457+ #[ test]
458+ fn test_sanitize_unicode ( ) {
459+ let ( result, modified) = sanitize_filename ( "файл.txt" ) ;
460+ assert_eq ! ( result, "файл.txt" ) ;
461+ assert ! ( !modified) ;
462+
463+ let ( result, modified) = sanitize_filename ( "文件.txt" ) ;
464+ assert_eq ! ( result, "文件.txt" ) ;
465+ assert ! ( !modified) ;
466+
467+ let ( result, modified) = sanitize_filename ( "emoji😀.txt" ) ;
468+ assert_eq ! ( result, "emoji😀.txt" ) ;
469+ assert ! ( !modified) ;
470+ }
471+
472+ #[ test]
473+ fn test_sanitize_mixed_valid_invalid ( ) {
474+ let ( result, modified) =
475+ sanitize_filename ( "Red vs. Blue - Season 1/Episode 1: Why Are We Here?.mp4" ) ;
476+ assert_eq ! (
477+ result,
478+ "Red vs. Blue - Season 1/Episode 1_ Why Are We Here_.mp4"
479+ ) ;
480+ assert ! ( modified) ;
481+ }
482+
483+ #[ test]
484+ fn test_sanitize_preserves_extension ( ) {
485+ let ( result, modified) = sanitize_filename ( "file:name.tar.gz" ) ;
486+ assert_eq ! ( result, "file_name.tar.gz" ) ;
487+ assert ! ( modified) ;
488+ }
489+ }
0 commit comments