Skip to content

Commit 74028ce

Browse files
committed
nl: support files with non-utf8 content
1 parent 2c9a27c commit 74028ce

File tree

2 files changed

+57
-23
lines changed

2 files changed

+57
-23
lines changed

src/uu/nl/src/nl.rs

Lines changed: 40 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,7 @@ use std::fs::File;
99
use std::io::{BufRead, BufReader, Read, stdin};
1010
use std::path::Path;
1111
use uucore::error::{FromIo, UResult, USimpleError, set_exit_code};
12-
use uucore::translate;
13-
14-
use uucore::LocalizedCommand;
15-
use uucore::{format_usage, show_error};
12+
use uucore::{LocalizedCommand, format_usage, show_error, translate};
1613

1714
mod helper;
1815

@@ -79,7 +76,7 @@ enum NumberingStyle {
7976
All,
8077
NonEmpty,
8178
None,
82-
Regex(Box<regex::Regex>),
79+
Regex(Box<regex::bytes::Regex>),
8380
}
8481

8582
impl TryFrom<&str> for NumberingStyle {
@@ -90,7 +87,7 @@ impl TryFrom<&str> for NumberingStyle {
9087
"a" => Ok(Self::All),
9188
"t" => Ok(Self::NonEmpty),
9289
"n" => Ok(Self::None),
93-
_ if s.starts_with('p') => match regex::Regex::new(&s[1..]) {
90+
_ if s.starts_with('p') => match regex::bytes::Regex::new(&s[1..]) {
9491
Ok(re) => Ok(Self::Regex(Box::new(re))),
9592
Err(_) => Err(translate!("nl-error-invalid-regex")),
9693
},
@@ -143,19 +140,30 @@ enum SectionDelimiter {
143140
impl SectionDelimiter {
144141
/// A valid section delimiter contains the pattern one to three times,
145142
/// and nothing else.
146-
fn parse(s: &str, pattern: &str) -> Option<Self> {
147-
if s.is_empty() || pattern.is_empty() {
143+
fn parse(bytes: &[u8], pattern: &str) -> Option<Self> {
144+
let pattern = pattern.as_bytes();
145+
146+
if bytes.is_empty() || pattern.is_empty() || bytes.len() % pattern.len() != 0 {
148147
return None;
149148
}
150149

151-
let pattern_count = s.matches(pattern).count();
152-
let is_length_ok = pattern_count * pattern.len() == s.len();
150+
let count = bytes.len() / pattern.len();
151+
if !(1..=3).contains(&count) {
152+
return None;
153+
}
153154

154-
match (pattern_count, is_length_ok) {
155-
(3, true) => Some(Self::Header),
156-
(2, true) => Some(Self::Body),
157-
(1, true) => Some(Self::Footer),
158-
_ => None,
155+
if bytes
156+
.chunks_exact(pattern.len())
157+
.all(|chunk| chunk == pattern)
158+
{
159+
match count {
160+
1 => Some(Self::Footer),
161+
2 => Some(Self::Body),
162+
3 => Some(Self::Header),
163+
_ => unreachable!(),
164+
}
165+
} else {
166+
None
159167
}
160168
}
161169
}
@@ -338,9 +346,21 @@ pub fn uu_app() -> Command {
338346
/// `nl` implements the main functionality for an individual buffer.
339347
fn nl<T: Read>(reader: &mut BufReader<T>, stats: &mut Stats, settings: &Settings) -> UResult<()> {
340348
let mut current_numbering_style = &settings.body_numbering;
349+
let mut line = Vec::new();
350+
351+
loop {
352+
line.clear();
353+
// reads up to and including b'\n'; returns 0 on EOF
354+
let n = reader
355+
.read_until(b'\n', &mut line)
356+
.map_err_context(|| translate!("nl-error-could-not-read-line"))?;
357+
if n == 0 {
358+
break;
359+
}
341360

342-
for line in reader.lines() {
343-
let line = line.map_err_context(|| translate!("nl-error-could-not-read-line"))?;
361+
if line.last().copied() == Some(b'\n') {
362+
line.pop();
363+
}
344364

345365
if line.is_empty() {
346366
stats.consecutive_empty_lines += 1;
@@ -387,11 +407,12 @@ fn nl<T: Read>(reader: &mut BufReader<T>, stats: &mut Stats, settings: &Settings
387407
));
388408
};
389409
println!(
390-
"{}{}{line}",
410+
"{}{}{}",
391411
settings
392412
.number_format
393413
.format(line_number, settings.number_width),
394414
settings.number_separator.to_string_lossy(),
415+
String::from_utf8_lossy(&line),
395416
);
396417
// update line number for the potential next line
397418
match line_number.checked_add(settings.line_increment) {
@@ -400,7 +421,7 @@ fn nl<T: Read>(reader: &mut BufReader<T>, stats: &mut Stats, settings: &Settings
400421
}
401422
} else {
402423
let spaces = " ".repeat(settings.number_width + 1);
403-
println!("{spaces}{line}");
424+
println!("{spaces}{}", String::from_utf8_lossy(&line));
404425
}
405426
}
406427
}

tests/by-util/test_nl.rs

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,7 @@
44
// file that was distributed with this source code.
55
//
66
// spell-checker:ignore binvalid finvalid hinvalid iinvalid linvalid nabcabc nabcabcabc ninvalid vinvalid winvalid dabc näää
7-
use uutests::at_and_ucmd;
8-
use uutests::new_ucmd;
9-
use uutests::util::TestScenario;
10-
use uutests::util_name;
7+
use uutests::{at_and_ucmd, new_ucmd, util::TestScenario, util_name};
118

129
#[test]
1310
#[cfg(target_os = "linux")]
@@ -702,3 +699,19 @@ fn test_directory_as_input() {
702699
.stderr_is(format!("nl: {dir}: Is a directory\n"))
703700
.stdout_contains(content);
704701
}
702+
703+
#[test]
704+
fn test_file_with_non_utf8_content() {
705+
let (at, mut ucmd) = at_and_ucmd!();
706+
707+
let filename = "file";
708+
let content: &[u8] = b"a\n\xFF\xFE\nb";
709+
let invalid_utf8: &[u8] = b"\xFF\xFE";
710+
711+
at.write_bytes(filename, content);
712+
713+
ucmd.arg(filename).succeeds().stdout_is(format!(
714+
" 1\ta\n 2\t{}\n 3\tb\n",
715+
String::from_utf8_lossy(invalid_utf8)
716+
));
717+
}

0 commit comments

Comments
 (0)