diff --git a/.vscode/cspell.dictionaries/jargon.wordlist.txt b/.vscode/cspell.dictionaries/jargon.wordlist.txt index 9fa0b625aac..6659251e994 100644 --- a/.vscode/cspell.dictionaries/jargon.wordlist.txt +++ b/.vscode/cspell.dictionaries/jargon.wordlist.txt @@ -199,6 +199,7 @@ nofield # * clippy uninlined nonminimal +rposition # * CPU/hardware features ASIMD diff --git a/src/uu/dirname/src/dirname.rs b/src/uu/dirname/src/dirname.rs index 3399b4a031c..8659465cdb0 100644 --- a/src/uu/dirname/src/dirname.rs +++ b/src/uu/dirname/src/dirname.rs @@ -4,8 +4,9 @@ // file that was distributed with this source code. use clap::{Arg, ArgAction, Command}; +use std::borrow::Cow; use std::ffi::OsString; -use std::path::Path; +#[cfg(unix)] use uucore::display::print_verbatim; use uucore::error::{UResult, UUsageError}; use uucore::format_usage; @@ -18,51 +19,84 @@ mod options { pub const DIR: &str = "dir"; } -/// Handle the special case where a path ends with "/." +/// Perform dirname as pure string manipulation per POSIX/GNU behavior. +/// +/// dirname should NOT normalize paths. It does simple string manipulation: +/// 1. Strip trailing slashes (unless path is all slashes) +/// 2. If ends with `/.` (possibly `//.` or `///.`), strip the `/+.` pattern +/// 3. Otherwise, remove everything after the last `/` +/// 4. If no `/` found, return `.` +/// 5. Strip trailing slashes from result (unless result would be empty) +/// +/// Examples: +/// - `foo/.` → `foo` +/// - `foo/./bar` → `foo/.` +/// - `foo/bar` → `foo` +/// - `a/b/c` → `a/b` /// -/// This matches GNU/POSIX behavior where `dirname("/home/dos/.")` returns "/home/dos" -/// rather than "/home" (which would be the result of `Path::parent()` due to normalization). /// Per POSIX.1-2017 dirname specification and GNU coreutils manual: /// - POSIX: /// - GNU: /// -/// dirname should do simple string manipulation without path normalization. /// See issue #8910 and similar fix in basename (#8373, commit c5268a897). -/// -/// Returns `Some(())` if the special case was handled (output already printed), -/// or `None` if normal `Path::parent()` logic should be used. -fn handle_trailing_dot(path_bytes: &[u8]) -> Option<()> { - if !path_bytes.ends_with(b"/.") { - return None; +fn dirname_string_manipulation(path_bytes: &[u8]) -> Cow<'_, [u8]> { + if path_bytes.is_empty() { + return Cow::Borrowed(b"."); } - // Strip the "/." suffix and print the result - if path_bytes.len() == 2 { - // Special case: "/." -> "/" - print!("/"); - Some(()) - } else { - // General case: "/home/dos/." -> "/home/dos" - let stripped = &path_bytes[..path_bytes.len() - 2]; - #[cfg(unix)] - { - use std::os::unix::ffi::OsStrExt; - let result = std::ffi::OsStr::from_bytes(stripped); - print_verbatim(result).unwrap(); - Some(()) - } - #[cfg(not(unix))] - { - // On non-Unix, fall back to lossy conversion - if let Ok(s) = std::str::from_utf8(stripped) { - print!("{s}"); - Some(()) - } else { - // Can't handle non-UTF-8 on non-Unix, fall through to normal logic - None + let mut bytes = path_bytes; + + // Step 1: Strip trailing slashes (but not if the entire path is slashes) + let all_slashes = bytes.iter().all(|&b| b == b'/'); + if all_slashes { + return Cow::Borrowed(b"/"); + } + + while bytes.len() > 1 && bytes.ends_with(b"/") { + bytes = &bytes[..bytes.len() - 1]; + } + + // Step 2: Check if it ends with `/.` and strip the `/+.` pattern + if bytes.ends_with(b".") && bytes.len() >= 2 { + let dot_pos = bytes.len() - 1; + if bytes[dot_pos - 1] == b'/' { + // Find where the slashes before the dot start + let mut slash_start = dot_pos - 1; + while slash_start > 0 && bytes[slash_start - 1] == b'/' { + slash_start -= 1; + } + // Return the stripped result + if slash_start == 0 { + // Result would be empty + return if path_bytes.starts_with(b"/") { + Cow::Borrowed(b"/") + } else { + Cow::Borrowed(b".") + }; } + return Cow::Owned(bytes[..slash_start].to_vec()); } } + + // Step 3: Normal dirname - find last / and remove everything after it + if let Some(last_slash_pos) = bytes.iter().rposition(|&b| b == b'/') { + // Found a slash, remove everything after it + let mut result = &bytes[..last_slash_pos]; + + // Strip trailing slashes from result (but keep at least one if at the start) + while result.len() > 1 && result.ends_with(b"/") { + result = &result[..result.len() - 1]; + } + + if result.is_empty() { + return Cow::Borrowed(b"/"); + } + + return Cow::Owned(result.to_vec()); + } + + // No slash found, return "." + Cow::Borrowed(b".") } #[uucore::main] @@ -83,27 +117,25 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { for path in &dirnames { let path_bytes = uucore::os_str_as_bytes(path.as_os_str()).unwrap_or(&[]); + let result = dirname_string_manipulation(path_bytes); - if handle_trailing_dot(path_bytes).is_none() { - // Normal path handling using Path::parent() - let p = Path::new(path); - match p.parent() { - Some(d) => { - if d.components().next().is_none() { - print!("."); - } else { - print_verbatim(d).unwrap(); - } - } - None => { - if p.is_absolute() || path.as_os_str() == "/" { - print!("/"); - } else { - print!("."); - } - } + #[cfg(unix)] + { + use std::os::unix::ffi::OsStrExt; + let result_os = std::ffi::OsStr::from_bytes(&result); + print_verbatim(result_os).unwrap(); + } + #[cfg(not(unix))] + { + // On non-Unix, fall back to lossy conversion + if let Ok(s) = std::str::from_utf8(&result) { + print!("{s}"); + } else { + // Fallback for non-UTF-8 paths on non-Unix systems + print!("."); } } + print!("{line_ending}"); } diff --git a/tests/by-util/test_dirname.rs b/tests/by-util/test_dirname.rs index c7cdf3a4633..bd6994107ec 100644 --- a/tests/by-util/test_dirname.rs +++ b/tests/by-util/test_dirname.rs @@ -9,6 +9,11 @@ fn test_invalid_arg() { new_ucmd!().arg("--definitely-invalid").fails_with_code(1); } +#[test] +fn test_missing_operand() { + new_ucmd!().fails_with_code(1); +} + #[test] fn test_path_with_trailing_slashes() { new_ucmd!() @@ -71,15 +76,11 @@ fn test_dirname_non_utf8_paths() { use std::ffi::OsStr; use std::os::unix::ffi::OsStrExt; - // Create a test file with non-UTF-8 bytes in the name let non_utf8_bytes = b"test_\xFF\xFE/file.txt"; let non_utf8_name = OsStr::from_bytes(non_utf8_bytes); - // Test that dirname handles non-UTF-8 paths without crashing let result = new_ucmd!().arg(non_utf8_name).succeeds(); - // Just verify it didn't crash and produced some output - // The exact output format may vary due to lossy conversion let output = result.stdout_str_lossy(); assert!(!output.is_empty()); assert!(output.contains("test_")); @@ -156,7 +157,7 @@ fn test_trailing_dot_edge_cases() { new_ucmd!() .arg("/home/dos//.") .succeeds() - .stdout_is("/home/dos/\n"); + .stdout_is("/home/dos\n"); // Path with . in middle (should use normal logic) new_ucmd!() @@ -216,3 +217,62 @@ fn test_existing_behavior_preserved() { .succeeds() .stdout_is("/home/dos\n"); } + +#[test] +fn test_multiple_paths_comprehensive() { + // Comprehensive test for multiple paths in single invocation + new_ucmd!() + .args(&[ + "/home/dos/.", + "/var/log", + ".", + "/tmp/.", + "", + "/", + "relative/path", + ]) + .succeeds() + .stdout_is("/home/dos\n/var\n.\n/tmp\n.\n/\nrelative\n"); +} + +#[test] +fn test_all_dot_slash_variations() { + // Tests for all the cases mentioned in issue #8910 comment + // https://github.com/uutils/coreutils/issues/8910#issuecomment-3408735720 + + new_ucmd!().arg("foo//.").succeeds().stdout_is("foo\n"); + + new_ucmd!().arg("foo///.").succeeds().stdout_is("foo\n"); + + new_ucmd!().arg("foo/./").succeeds().stdout_is("foo\n"); + + new_ucmd!() + .arg("foo/bar/./") + .succeeds() + .stdout_is("foo/bar\n"); + + new_ucmd!().arg("foo/./bar").succeeds().stdout_is("foo/.\n"); +} + +#[test] +fn test_dot_slash_component_preservation() { + // Ensure that /. components in the middle are preserved + // These should NOT be normalized away + + new_ucmd!().arg("a/./b").succeeds().stdout_is("a/.\n"); + + new_ucmd!() + .arg("a/./b/./c") + .succeeds() + .stdout_is("a/./b/.\n"); + + new_ucmd!() + .arg("foo/./bar/baz") + .succeeds() + .stdout_is("foo/./bar\n"); + + new_ucmd!() + .arg("/path/./to/file") + .succeeds() + .stdout_is("/path/./to\n"); +}