Skip to content

Commit b3ad96f

Browse files
authored
refactor(dirname): implement pure string manipulation per POSIX (#8936)
1 parent 571e139 commit b3ad96f

File tree

3 files changed

+151
-58
lines changed

3 files changed

+151
-58
lines changed

.vscode/cspell.dictionaries/jargon.wordlist.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,7 @@ nofield
202202
# * clippy
203203
uninlined
204204
nonminimal
205+
rposition
205206

206207
# * CPU/hardware features
207208
ASIMD

src/uu/dirname/src/dirname.rs

Lines changed: 85 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,9 @@
44
// file that was distributed with this source code.
55

66
use clap::{Arg, ArgAction, Command};
7+
use std::borrow::Cow;
78
use std::ffi::OsString;
8-
use std::path::Path;
9+
#[cfg(unix)]
910
use uucore::display::print_verbatim;
1011
use uucore::error::{UResult, UUsageError};
1112
use uucore::format_usage;
@@ -18,51 +19,84 @@ mod options {
1819
pub const DIR: &str = "dir";
1920
}
2021

21-
/// Handle the special case where a path ends with "/."
22+
/// Perform dirname as pure string manipulation per POSIX/GNU behavior.
23+
///
24+
/// dirname should NOT normalize paths. It does simple string manipulation:
25+
/// 1. Strip trailing slashes (unless path is all slashes)
26+
/// 2. If ends with `/.` (possibly `//.` or `///.`), strip the `/+.` pattern
27+
/// 3. Otherwise, remove everything after the last `/`
28+
/// 4. If no `/` found, return `.`
29+
/// 5. Strip trailing slashes from result (unless result would be empty)
30+
///
31+
/// Examples:
32+
/// - `foo/.` → `foo`
33+
/// - `foo/./bar` → `foo/.`
34+
/// - `foo/bar` → `foo`
35+
/// - `a/b/c` → `a/b`
2236
///
23-
/// This matches GNU/POSIX behavior where `dirname("/home/dos/.")` returns "/home/dos"
24-
/// rather than "/home" (which would be the result of `Path::parent()` due to normalization).
2537
/// Per POSIX.1-2017 dirname specification and GNU coreutils manual:
2638
/// - POSIX: <https://pubs.opengroup.org/onlinepubs/9699919799/utilities/dirname.html>
2739
/// - GNU: <https://www.gnu.org/software/coreutils/manual/html_node/dirname-invocation.html>
2840
///
29-
/// dirname should do simple string manipulation without path normalization.
3041
/// See issue #8910 and similar fix in basename (#8373, commit c5268a897).
31-
///
32-
/// Returns `Some(())` if the special case was handled (output already printed),
33-
/// or `None` if normal `Path::parent()` logic should be used.
34-
fn handle_trailing_dot(path_bytes: &[u8]) -> Option<()> {
35-
if !path_bytes.ends_with(b"/.") {
36-
return None;
42+
fn dirname_string_manipulation(path_bytes: &[u8]) -> Cow<'_, [u8]> {
43+
if path_bytes.is_empty() {
44+
return Cow::Borrowed(b".");
3745
}
3846

39-
// Strip the "/." suffix and print the result
40-
if path_bytes.len() == 2 {
41-
// Special case: "/." -> "/"
42-
print!("/");
43-
Some(())
44-
} else {
45-
// General case: "/home/dos/." -> "/home/dos"
46-
let stripped = &path_bytes[..path_bytes.len() - 2];
47-
#[cfg(unix)]
48-
{
49-
use std::os::unix::ffi::OsStrExt;
50-
let result = std::ffi::OsStr::from_bytes(stripped);
51-
print_verbatim(result).unwrap();
52-
Some(())
53-
}
54-
#[cfg(not(unix))]
55-
{
56-
// On non-Unix, fall back to lossy conversion
57-
if let Ok(s) = std::str::from_utf8(stripped) {
58-
print!("{s}");
59-
Some(())
60-
} else {
61-
// Can't handle non-UTF-8 on non-Unix, fall through to normal logic
62-
None
47+
let mut bytes = path_bytes;
48+
49+
// Step 1: Strip trailing slashes (but not if the entire path is slashes)
50+
let all_slashes = bytes.iter().all(|&b| b == b'/');
51+
if all_slashes {
52+
return Cow::Borrowed(b"/");
53+
}
54+
55+
while bytes.len() > 1 && bytes.ends_with(b"/") {
56+
bytes = &bytes[..bytes.len() - 1];
57+
}
58+
59+
// Step 2: Check if it ends with `/.` and strip the `/+.` pattern
60+
if bytes.ends_with(b".") && bytes.len() >= 2 {
61+
let dot_pos = bytes.len() - 1;
62+
if bytes[dot_pos - 1] == b'/' {
63+
// Find where the slashes before the dot start
64+
let mut slash_start = dot_pos - 1;
65+
while slash_start > 0 && bytes[slash_start - 1] == b'/' {
66+
slash_start -= 1;
67+
}
68+
// Return the stripped result
69+
if slash_start == 0 {
70+
// Result would be empty
71+
return if path_bytes.starts_with(b"/") {
72+
Cow::Borrowed(b"/")
73+
} else {
74+
Cow::Borrowed(b".")
75+
};
6376
}
77+
return Cow::Owned(bytes[..slash_start].to_vec());
6478
}
6579
}
80+
81+
// Step 3: Normal dirname - find last / and remove everything after it
82+
if let Some(last_slash_pos) = bytes.iter().rposition(|&b| b == b'/') {
83+
// Found a slash, remove everything after it
84+
let mut result = &bytes[..last_slash_pos];
85+
86+
// Strip trailing slashes from result (but keep at least one if at the start)
87+
while result.len() > 1 && result.ends_with(b"/") {
88+
result = &result[..result.len() - 1];
89+
}
90+
91+
if result.is_empty() {
92+
return Cow::Borrowed(b"/");
93+
}
94+
95+
return Cow::Owned(result.to_vec());
96+
}
97+
98+
// No slash found, return "."
99+
Cow::Borrowed(b".")
66100
}
67101

68102
#[uucore::main]
@@ -83,27 +117,25 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
83117

84118
for path in &dirnames {
85119
let path_bytes = uucore::os_str_as_bytes(path.as_os_str()).unwrap_or(&[]);
120+
let result = dirname_string_manipulation(path_bytes);
86121

87-
if handle_trailing_dot(path_bytes).is_none() {
88-
// Normal path handling using Path::parent()
89-
let p = Path::new(path);
90-
match p.parent() {
91-
Some(d) => {
92-
if d.components().next().is_none() {
93-
print!(".");
94-
} else {
95-
print_verbatim(d).unwrap();
96-
}
97-
}
98-
None => {
99-
if p.is_absolute() || path.as_os_str() == "/" {
100-
print!("/");
101-
} else {
102-
print!(".");
103-
}
104-
}
122+
#[cfg(unix)]
123+
{
124+
use std::os::unix::ffi::OsStrExt;
125+
let result_os = std::ffi::OsStr::from_bytes(&result);
126+
print_verbatim(result_os).unwrap();
127+
}
128+
#[cfg(not(unix))]
129+
{
130+
// On non-Unix, fall back to lossy conversion
131+
if let Ok(s) = std::str::from_utf8(&result) {
132+
print!("{s}");
133+
} else {
134+
// Fallback for non-UTF-8 paths on non-Unix systems
135+
print!(".");
105136
}
106137
}
138+
107139
print!("{line_ending}");
108140
}
109141

tests/by-util/test_dirname.rs

Lines changed: 65 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,11 @@ fn test_invalid_arg() {
99
new_ucmd!().arg("--definitely-invalid").fails_with_code(1);
1010
}
1111

12+
#[test]
13+
fn test_missing_operand() {
14+
new_ucmd!().fails_with_code(1);
15+
}
16+
1217
#[test]
1318
fn test_path_with_trailing_slashes() {
1419
new_ucmd!()
@@ -71,15 +76,11 @@ fn test_dirname_non_utf8_paths() {
7176
use std::ffi::OsStr;
7277
use std::os::unix::ffi::OsStrExt;
7378

74-
// Create a test file with non-UTF-8 bytes in the name
7579
let non_utf8_bytes = b"test_\xFF\xFE/file.txt";
7680
let non_utf8_name = OsStr::from_bytes(non_utf8_bytes);
7781

78-
// Test that dirname handles non-UTF-8 paths without crashing
7982
let result = new_ucmd!().arg(non_utf8_name).succeeds();
8083

81-
// Just verify it didn't crash and produced some output
82-
// The exact output format may vary due to lossy conversion
8384
let output = result.stdout_str_lossy();
8485
assert!(!output.is_empty());
8586
assert!(output.contains("test_"));
@@ -156,7 +157,7 @@ fn test_trailing_dot_edge_cases() {
156157
new_ucmd!()
157158
.arg("/home/dos//.")
158159
.succeeds()
159-
.stdout_is("/home/dos/\n");
160+
.stdout_is("/home/dos\n");
160161

161162
// Path with . in middle (should use normal logic)
162163
new_ucmd!()
@@ -216,3 +217,62 @@ fn test_existing_behavior_preserved() {
216217
.succeeds()
217218
.stdout_is("/home/dos\n");
218219
}
220+
221+
#[test]
222+
fn test_multiple_paths_comprehensive() {
223+
// Comprehensive test for multiple paths in single invocation
224+
new_ucmd!()
225+
.args(&[
226+
"/home/dos/.",
227+
"/var/log",
228+
".",
229+
"/tmp/.",
230+
"",
231+
"/",
232+
"relative/path",
233+
])
234+
.succeeds()
235+
.stdout_is("/home/dos\n/var\n.\n/tmp\n.\n/\nrelative\n");
236+
}
237+
238+
#[test]
239+
fn test_all_dot_slash_variations() {
240+
// Tests for all the cases mentioned in issue #8910 comment
241+
// https://github.com/uutils/coreutils/issues/8910#issuecomment-3408735720
242+
243+
new_ucmd!().arg("foo//.").succeeds().stdout_is("foo\n");
244+
245+
new_ucmd!().arg("foo///.").succeeds().stdout_is("foo\n");
246+
247+
new_ucmd!().arg("foo/./").succeeds().stdout_is("foo\n");
248+
249+
new_ucmd!()
250+
.arg("foo/bar/./")
251+
.succeeds()
252+
.stdout_is("foo/bar\n");
253+
254+
new_ucmd!().arg("foo/./bar").succeeds().stdout_is("foo/.\n");
255+
}
256+
257+
#[test]
258+
fn test_dot_slash_component_preservation() {
259+
// Ensure that /. components in the middle are preserved
260+
// These should NOT be normalized away
261+
262+
new_ucmd!().arg("a/./b").succeeds().stdout_is("a/.\n");
263+
264+
new_ucmd!()
265+
.arg("a/./b/./c")
266+
.succeeds()
267+
.stdout_is("a/./b/.\n");
268+
269+
new_ucmd!()
270+
.arg("foo/./bar/baz")
271+
.succeeds()
272+
.stdout_is("foo/./bar\n");
273+
274+
new_ucmd!()
275+
.arg("/path/./to/file")
276+
.succeeds()
277+
.stdout_is("/path/./to\n");
278+
}

0 commit comments

Comments
 (0)