diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index 071163c5aee..cb1ff5d8e6e 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -1310,26 +1310,34 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { let reader = open_with_open_failed_error(&files0_from)?; let buf_reader = BufReader::new(reader); for (line_num, line) in buf_reader.split(b'\0').flatten().enumerate() { - let f = std::str::from_utf8(&line) - .expect("Could not parse string from zero terminated input."); - match f { - STDIN_FILE => { - return Err(SortError::MinusInStdIn.into()); + // Handle filenames as raw bytes to support non-UTF-8 paths + #[cfg(unix)] + let filename = { + use std::os::unix::ffi::OsStrExt; + OsStr::from_bytes(&line).to_owned() + }; + #[cfg(not(unix))] + let filename = { + // On non-Unix systems, convert to UTF-8 with replacement chars + match std::str::from_utf8(&line) { + Ok(s) => OsString::from(s), + Err(_) => OsString::from(String::from_utf8_lossy(&line).into_owned()), } - "" => { - return Err(SortError::ZeroLengthFileName { - file: files0_from, - line_num: line_num + 1, - } - .into()); + }; + + // Check for special cases using bytes comparison + if line == b"-" { + return Err(SortError::MinusInStdIn.into()); + } + if line.is_empty() { + return Err(SortError::ZeroLengthFileName { + file: files0_from, + line_num: line_num + 1, } - _ => {} + .into()); } - files.push(OsString::from( - std::str::from_utf8(&line) - .expect("Could not parse string from zero terminated input."), - )); + files.push(filename); } if files.is_empty() { return Err(SortError::EmptyInputFile { file: files0_from }.into()); diff --git a/tests/by-util/test_sort.rs b/tests/by-util/test_sort.rs index 6330f759df0..756b3c27480 100644 --- a/tests/by-util/test_sort.rs +++ b/tests/by-util/test_sort.rs @@ -1547,6 +1547,36 @@ fn test_files0_from_zero_length() { .stderr_only("sort: -:2: invalid zero-length file name\n"); } +#[test] +#[cfg(target_os = "linux")] +fn test_files0_from_non_utf8_filename() { + use std::ffi::OsStr; + use std::os::unix::ffi::OsStrExt; + let (at, mut ucmd) = at_and_ucmd!(); + + at.touch("file1.txt"); + at.append("file1.txt", "zebra\n"); + at.touch("file2.txt"); + at.append("file2.txt", "apple\n"); + + // Create a file with non-UTF-8 bytes in filename + // spell-checker:ignore fffile + let non_utf8_name = OsStr::from_bytes(b"\xff\xff_file3.txt"); + let full_path = at.plus(non_utf8_name); + std::fs::write(&full_path, "banana\n").unwrap(); + + // Create files0-from input containing the non-UTF-8 filename bytes + let mut files0_input = Vec::new(); + files0_input.extend_from_slice(b"file1.txt\0"); + files0_input.extend_from_slice(b"\xff\xff_file3.txt\0"); + files0_input.extend_from_slice(b"file2.txt\0"); + + ucmd.args(&["--files0-from", "-"]) + .pipe_in(files0_input) + .succeeds() + .stdout_is("apple\nbanana\nzebra\n"); +} + #[test] // Test for GNU tests/sort/sort-float.sh fn test_g_float() {