diff --git a/src/uu/nl/src/nl.rs b/src/uu/nl/src/nl.rs index 7d1f862aa5e..18ad095a8ab 100644 --- a/src/uu/nl/src/nl.rs +++ b/src/uu/nl/src/nl.rs @@ -345,6 +345,13 @@ pub fn uu_app() -> Command { ) } +/// Helper to write: prefix bytes + line bytes + newline +fn write_line(writer: &mut impl Write, prefix: &[u8], line: &[u8]) -> std::io::Result<()> { + writer.write_all(prefix)?; + writer.write_all(line)?; + writeln!(writer) +} + /// `nl` implements the main functionality for an individual buffer. fn nl(reader: &mut BufReader, stats: &mut Stats, settings: &Settings) -> UResult<()> { let mut writer = BufWriter::new(stdout()); @@ -409,24 +416,17 @@ fn nl(reader: &mut BufReader, stats: &mut Stats, settings: &Settings translate!("nl-error-line-number-overflow"), )); }; - writeln!( - writer, - "{}{}{}", - settings - .number_format - .format(line_number, settings.number_width), - settings.number_separator.to_string_lossy(), - String::from_utf8_lossy(&line), - ) - .map_err_context(|| translate!("nl-error-could-not-write"))?; - // update line number for the potential next line - match line_number.checked_add(settings.line_increment) { - Some(new_line_number) => stats.line_number = Some(new_line_number), - None => stats.line_number = None, // overflow - } + let mut prefix = settings + .number_format + .format(line_number, settings.number_width) + .into_bytes(); + prefix.extend_from_slice(settings.number_separator.as_encoded_bytes()); + write_line(&mut writer, &prefix, &line) + .map_err_context(|| translate!("nl-error-could-not-write"))?; + stats.line_number = line_number.checked_add(settings.line_increment); } else { - let spaces = " ".repeat(settings.number_width + 1); - writeln!(writer, "{spaces}{}", String::from_utf8_lossy(&line)) + let prefix = " ".repeat(settings.number_width + 1); + write_line(&mut writer, prefix.as_bytes(), &line) .map_err_context(|| translate!("nl-error-could-not-write"))?; } } diff --git a/tests/by-util/test_nl.rs b/tests/by-util/test_nl.rs index ab430b20bcc..dab5cc47f92 100644 --- a/tests/by-util/test_nl.rs +++ b/tests/by-util/test_nl.rs @@ -3,7 +3,7 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // -// spell-checker:ignore binvalid finvalid hinvalid iinvalid linvalid nabcabc nabcabcabc ninvalid vinvalid winvalid dabc näää +// spell-checker:ignore binvalid finvalid hinvalid iinvalid linvalid nabcabc nabcabcabc ninvalid vinvalid winvalid dabc näää févr use uutests::{at_and_ucmd, new_ucmd, util::TestScenario, util_name}; #[test] @@ -209,23 +209,24 @@ fn test_number_separator() { #[test] #[cfg(target_os = "linux")] fn test_number_separator_non_utf8() { - use std::{ - ffi::{OsStr, OsString}, - os::unix::ffi::{OsStrExt, OsStringExt}, - }; + use std::{ffi::OsString, os::unix::ffi::OsStringExt}; let separator_bytes = [0xFF, 0xFE]; let mut v = b"--number-separator=".to_vec(); v.extend_from_slice(&separator_bytes); let arg = OsString::from_vec(v); - let separator = OsStr::from_bytes(&separator_bytes); + + // Raw bytes should be preserved in the separator output + let mut expected = b" 1".to_vec(); + expected.extend_from_slice(&separator_bytes); + expected.extend_from_slice(b"test\n"); new_ucmd!() .arg(arg) .pipe_in("test") .succeeds() - .stdout_is(format!(" 1{}test\n", separator.to_string_lossy())); + .stdout_is_bytes(expected); } #[test] @@ -791,14 +792,24 @@ fn test_file_with_non_utf8_content() { let filename = "file"; let content: &[u8] = b"a\n\xFF\xFE\nb"; - let invalid_utf8: &[u8] = b"\xFF\xFE"; at.write_bytes(filename, content); - ucmd.arg(filename).succeeds().stdout_is(format!( - " 1\ta\n 2\t{}\n 3\tb\n", - String::from_utf8_lossy(invalid_utf8) - )); + // Raw bytes should be preserved in output (not converted to UTF-8 replacement chars) + let expected: Vec = b" 1\ta\n 2\t\xFF\xFE\n 3\tb\n".to_vec(); + ucmd.arg(filename).succeeds().stdout_is_bytes(expected); +} + +#[test] +fn test_stdin_non_utf8_preserved() { + // Verify that non-UTF8 bytes are preserved in output, not converted to replacement chars + // This is important for locale compatibility + let input: Vec = b"f\xe9vr.\n".to_vec(); // "févr." in Latin-1 + let expected: Vec = b" 1\tf\xe9vr.\n".to_vec(); + new_ucmd!() + .pipe_in(input) + .succeeds() + .stdout_is_bytes(expected); } // Regression tests for issue #9132: repeated flags should use last value