Skip to content

Commit 6b23e6f

Browse files
authored
Merge pull request #9673 from ChrisDryden/fix-nl-preserve-bytes
nl: preserve raw bytes in output instead of using from_utf8_lossy
2 parents 0a1ae35 + 93c8d54 commit 6b23e6f

File tree

2 files changed

+40
-29
lines changed

2 files changed

+40
-29
lines changed

src/uu/nl/src/nl.rs

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,13 @@ pub fn uu_app() -> Command {
345345
)
346346
}
347347

348+
/// Helper to write: prefix bytes + line bytes + newline
349+
fn write_line(writer: &mut impl Write, prefix: &[u8], line: &[u8]) -> std::io::Result<()> {
350+
writer.write_all(prefix)?;
351+
writer.write_all(line)?;
352+
writeln!(writer)
353+
}
354+
348355
/// `nl` implements the main functionality for an individual buffer.
349356
fn nl<T: Read>(reader: &mut BufReader<T>, stats: &mut Stats, settings: &Settings) -> UResult<()> {
350357
let mut writer = BufWriter::new(stdout());
@@ -409,24 +416,17 @@ fn nl<T: Read>(reader: &mut BufReader<T>, stats: &mut Stats, settings: &Settings
409416
translate!("nl-error-line-number-overflow"),
410417
));
411418
};
412-
writeln!(
413-
writer,
414-
"{}{}{}",
415-
settings
416-
.number_format
417-
.format(line_number, settings.number_width),
418-
settings.number_separator.to_string_lossy(),
419-
String::from_utf8_lossy(&line),
420-
)
421-
.map_err_context(|| translate!("nl-error-could-not-write"))?;
422-
// update line number for the potential next line
423-
match line_number.checked_add(settings.line_increment) {
424-
Some(new_line_number) => stats.line_number = Some(new_line_number),
425-
None => stats.line_number = None, // overflow
426-
}
419+
let mut prefix = settings
420+
.number_format
421+
.format(line_number, settings.number_width)
422+
.into_bytes();
423+
prefix.extend_from_slice(settings.number_separator.as_encoded_bytes());
424+
write_line(&mut writer, &prefix, &line)
425+
.map_err_context(|| translate!("nl-error-could-not-write"))?;
426+
stats.line_number = line_number.checked_add(settings.line_increment);
427427
} else {
428-
let spaces = " ".repeat(settings.number_width + 1);
429-
writeln!(writer, "{spaces}{}", String::from_utf8_lossy(&line))
428+
let prefix = " ".repeat(settings.number_width + 1);
429+
write_line(&mut writer, prefix.as_bytes(), &line)
430430
.map_err_context(|| translate!("nl-error-could-not-write"))?;
431431
}
432432
}

tests/by-util/test_nl.rs

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
// For the full copyright and license information, please view the LICENSE
44
// file that was distributed with this source code.
55
//
6-
// spell-checker:ignore binvalid finvalid hinvalid iinvalid linvalid nabcabc nabcabcabc ninvalid vinvalid winvalid dabc näää
6+
// spell-checker:ignore binvalid finvalid hinvalid iinvalid linvalid nabcabc nabcabcabc ninvalid vinvalid winvalid dabc näää févr
77
use uutests::{at_and_ucmd, new_ucmd, util::TestScenario, util_name};
88

99
#[test]
@@ -209,23 +209,24 @@ fn test_number_separator() {
209209
#[test]
210210
#[cfg(target_os = "linux")]
211211
fn test_number_separator_non_utf8() {
212-
use std::{
213-
ffi::{OsStr, OsString},
214-
os::unix::ffi::{OsStrExt, OsStringExt},
215-
};
212+
use std::{ffi::OsString, os::unix::ffi::OsStringExt};
216213

217214
let separator_bytes = [0xFF, 0xFE];
218215
let mut v = b"--number-separator=".to_vec();
219216
v.extend_from_slice(&separator_bytes);
220217

221218
let arg = OsString::from_vec(v);
222-
let separator = OsStr::from_bytes(&separator_bytes);
219+
220+
// Raw bytes should be preserved in the separator output
221+
let mut expected = b" 1".to_vec();
222+
expected.extend_from_slice(&separator_bytes);
223+
expected.extend_from_slice(b"test\n");
223224

224225
new_ucmd!()
225226
.arg(arg)
226227
.pipe_in("test")
227228
.succeeds()
228-
.stdout_is(format!(" 1{}test\n", separator.to_string_lossy()));
229+
.stdout_is_bytes(expected);
229230
}
230231

231232
#[test]
@@ -791,14 +792,24 @@ fn test_file_with_non_utf8_content() {
791792

792793
let filename = "file";
793794
let content: &[u8] = b"a\n\xFF\xFE\nb";
794-
let invalid_utf8: &[u8] = b"\xFF\xFE";
795795

796796
at.write_bytes(filename, content);
797797

798-
ucmd.arg(filename).succeeds().stdout_is(format!(
799-
" 1\ta\n 2\t{}\n 3\tb\n",
800-
String::from_utf8_lossy(invalid_utf8)
801-
));
798+
// Raw bytes should be preserved in output (not converted to UTF-8 replacement chars)
799+
let expected: Vec<u8> = b" 1\ta\n 2\t\xFF\xFE\n 3\tb\n".to_vec();
800+
ucmd.arg(filename).succeeds().stdout_is_bytes(expected);
801+
}
802+
803+
#[test]
804+
fn test_stdin_non_utf8_preserved() {
805+
// Verify that non-UTF8 bytes are preserved in output, not converted to replacement chars
806+
// This is important for locale compatibility
807+
let input: Vec<u8> = b"f\xe9vr.\n".to_vec(); // "févr." in Latin-1
808+
let expected: Vec<u8> = b" 1\tf\xe9vr.\n".to_vec();
809+
new_ucmd!()
810+
.pipe_in(input)
811+
.succeeds()
812+
.stdout_is_bytes(expected);
802813
}
803814

804815
// Regression tests for issue #9132: repeated flags should use last value

0 commit comments

Comments
 (0)