Skip to content

Commit 57f5452

Browse files
committed
sort: --files0-from should not panic when invalid utf-8
1 parent 2000af8 commit 57f5452

File tree

2 files changed

+55
-16
lines changed

2 files changed

+55
-16
lines changed

src/uu/sort/src/sort.rs

Lines changed: 24 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1291,26 +1291,34 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
12911291
let reader = open_with_open_failed_error(&files0_from)?;
12921292
let buf_reader = BufReader::new(reader);
12931293
for (line_num, line) in buf_reader.split(b'\0').flatten().enumerate() {
1294-
let f = std::str::from_utf8(&line)
1295-
.expect("Could not parse string from zero terminated input.");
1296-
match f {
1297-
STDIN_FILE => {
1298-
return Err(SortError::MinusInStdIn.into());
1294+
// Handle filenames as raw bytes to support non-UTF-8 paths
1295+
#[cfg(unix)]
1296+
let filename = {
1297+
use std::os::unix::ffi::OsStrExt;
1298+
OsStr::from_bytes(&line).to_owned()
1299+
};
1300+
#[cfg(not(unix))]
1301+
let filename = {
1302+
// On non-Unix systems, convert to UTF-8 with replacement chars
1303+
match std::str::from_utf8(&line) {
1304+
Ok(s) => OsString::from(s),
1305+
Err(_) => OsString::from(String::from_utf8_lossy(&line).into_owned()),
12991306
}
1300-
"" => {
1301-
return Err(SortError::ZeroLengthFileName {
1302-
file: files0_from,
1303-
line_num: line_num + 1,
1304-
}
1305-
.into());
1307+
};
1308+
1309+
// Check for special cases using bytes comparison
1310+
if line == b"-" {
1311+
return Err(SortError::MinusInStdIn.into());
1312+
}
1313+
if line.is_empty() {
1314+
return Err(SortError::ZeroLengthFileName {
1315+
file: files0_from,
1316+
line_num: line_num + 1,
13061317
}
1307-
_ => {}
1318+
.into());
13081319
}
13091320

1310-
files.push(OsString::from(
1311-
std::str::from_utf8(&line)
1312-
.expect("Could not parse string from zero terminated input."),
1313-
));
1321+
files.push(filename);
13141322
}
13151323
if files.is_empty() {
13161324
return Err(SortError::EmptyInputFile { file: files0_from }.into());

tests/by-util/test_sort.rs

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1544,6 +1544,37 @@ fn test_files0_from_zero_length() {
15441544
.stderr_only("sort: -:2: invalid zero-length file name\n");
15451545
}
15461546

1547+
#[test]
1548+
#[cfg(unix)]
1549+
fn test_files0_from_non_utf8_filename() {
1550+
use std::ffi::OsStr;
1551+
use std::os::unix::ffi::OsStrExt;
1552+
use std::path::Path;
1553+
let (at, mut ucmd) = at_and_ucmd!();
1554+
1555+
at.touch("file1.txt");
1556+
at.append("file1.txt", "zebra\n");
1557+
at.touch("file2.txt");
1558+
at.append("file2.txt", "apple\n");
1559+
1560+
// Create a file with non-UTF-8 bytes in filename
1561+
let non_utf8_name = OsStr::from_bytes(b"\xff\xfefile3.txt");
1562+
let non_utf8_path = at.plus_as_string("").to_owned() + "/";
1563+
let full_path = Path::new(&non_utf8_path).join(non_utf8_name);
1564+
std::fs::write(&full_path, "banana\n").unwrap();
1565+
1566+
// Create files0-from input containing the non-UTF-8 filename bytes
1567+
let mut files0_input = Vec::new();
1568+
files0_input.extend_from_slice(b"file1.txt\0");
1569+
files0_input.extend_from_slice(b"\xff\xfefile3.txt\0");
1570+
files0_input.extend_from_slice(b"file2.txt\0");
1571+
1572+
ucmd.args(&["--files0-from", "-"])
1573+
.pipe_in(files0_input)
1574+
.succeeds()
1575+
.stdout_is("apple\nbanana\nzebra\n");
1576+
}
1577+
15471578
#[test]
15481579
// Test for GNU tests/sort/sort-float.sh
15491580
fn test_g_float() {

0 commit comments

Comments
 (0)