Skip to content

Commit c56489e

Browse files
committed
cat: Performance improvement when printing line numbers
Add a simple class to manually maintain a string representation of the line number for the `cat` application. Maintaing this string is much faster than converting a `usize` line-number variable to a string each time it's needed. Gives a significant performance improvement with -n and -b flags.
1 parent e6ff6d5 commit c56489e

File tree

1 file changed

+85
-6
lines changed

1 file changed

+85
-6
lines changed

src/uu/cat/src/cat.rs

Lines changed: 85 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,64 @@ mod splice;
3333
const USAGE: &str = help_usage!("cat.md");
3434
const ABOUT: &str = help_about!("cat.md");
3535

36+
struct LineNumber {
37+
buf: Vec<u8>,
38+
}
39+
40+
// Logic to store a string for the line number. Manually incrementing the value
41+
// represented in a buffer like this is significantly faster than storing
42+
// a `usize` and using the standard Rust formatting macros to format a `usize`
43+
// to a string each time it's needed.
44+
// String is initialized to " 1\t" and incremented each time `increment` is
45+
// called. When the value overflows the range storable in the buffer, a b'1' is
46+
// prepended and the counting continues.
47+
impl LineNumber {
48+
fn new() -> Self {
49+
LineNumber {
50+
// Initialize buf to b" 1\t"
51+
buf: Vec::from(b" 1\t"),
52+
}
53+
}
54+
55+
fn increment(&mut self) {
56+
// skip(1) to avoid the \t in the last byte.
57+
for ascii_digit in self.buf.iter_mut().rev().skip(1) {
58+
// Working from the least-significant digit, increment the number in the buffer.
59+
// If we hit anything other than a b'9' we can break since the next digit is
60+
// unaffected.
61+
// Also note that if we hit a b' ', we can think of that as a 0 and increment to b'1'.
62+
// If/else here is faster than match (as measured with some benchmarking Apr-2025),
63+
// probably since we can prioritize most likely digits first.
64+
if (b'0'..=b'8').contains(ascii_digit) {
65+
*ascii_digit += 1;
66+
break;
67+
} else if b'9' == *ascii_digit {
68+
*ascii_digit = b'0';
69+
} else {
70+
assert_eq!(*ascii_digit, b' ');
71+
*ascii_digit = b'1';
72+
break;
73+
}
74+
}
75+
if self.buf[0] == b'0' {
76+
// This implies we've overflowed. In this case the buffer will be
77+
// [b'0', b'0', ..., b'0', b'\t'].
78+
// For debugging, the following logic would assert that to be the case.
79+
// assert_eq!(*self.buf.last().unwrap(), b'\t');
80+
// for ascii_digit in self.buf.iter_mut().rev().skip(1) {
81+
// assert_eq!(*ascii_digit, b'0');
82+
// }
83+
84+
// All we need to do is prepend a b'1' and we're good.
85+
self.buf.insert(0, b'1');
86+
}
87+
}
88+
89+
fn write(&self, writer: &mut impl Write) -> std::io::Result<()> {
90+
writer.write_all(&self.buf)
91+
}
92+
}
93+
3694
#[derive(Error, Debug)]
3795
enum CatError {
3896
/// Wrapper around `io::Error`
@@ -106,7 +164,7 @@ impl OutputOptions {
106164
/// when we can't write fast.
107165
struct OutputState {
108166
/// The current line number
109-
line_number: usize,
167+
line_number: LineNumber,
110168

111169
/// Whether the output cursor is at the beginning of a new line
112170
at_line_start: bool,
@@ -390,7 +448,7 @@ fn cat_files(files: &[String], options: &OutputOptions) -> UResult<()> {
390448
let out_info = FileInformation::from_file(&std::io::stdout()).ok();
391449

392450
let mut state = OutputState {
393-
line_number: 1,
451+
line_number: LineNumber::new(),
394452
at_line_start: true,
395453
skipped_carriage_return: false,
396454
one_blank_kept: false,
@@ -529,8 +587,8 @@ fn write_lines<R: FdReadable>(
529587
}
530588
state.one_blank_kept = false;
531589
if state.at_line_start && options.number != NumberingMode::None {
532-
write!(writer, "{0:6}\t", state.line_number)?;
533-
state.line_number += 1;
590+
state.line_number.write(&mut writer)?;
591+
state.line_number.increment();
534592
}
535593

536594
// print to end of line or end of buffer
@@ -589,8 +647,8 @@ fn write_new_line<W: Write>(
589647
if !state.at_line_start || !options.squeeze_blank || !state.one_blank_kept {
590648
state.one_blank_kept = true;
591649
if state.at_line_start && options.number == NumberingMode::All {
592-
write!(writer, "{0:6}\t", state.line_number)?;
593-
state.line_number += 1;
650+
state.line_number.write(writer)?;
651+
state.line_number.increment();
594652
}
595653
write_end_of_line(writer, options.end_of_line().as_bytes(), is_interactive)?;
596654
}
@@ -743,4 +801,25 @@ mod tests {
743801
assert_eq!(writer.buffer(), [b'^', byte + 64]);
744802
}
745803
}
804+
805+
#[test]
806+
fn test_incrementing_string() {
807+
let mut incrementing_string = super::LineNumber::new();
808+
assert_eq!(b" 1\t", incrementing_string.buf.as_slice());
809+
incrementing_string.increment();
810+
assert_eq!(b" 2\t", incrementing_string.buf.as_slice());
811+
// Run through to 100
812+
for _ in 3..=100 {
813+
incrementing_string.increment();
814+
}
815+
assert_eq!(b" 100\t", incrementing_string.buf.as_slice());
816+
// Run through until we overflow the original size.
817+
for _ in 101..=1000000 {
818+
incrementing_string.increment();
819+
}
820+
// Confirm that the buffer expands when we overflow the original size.
821+
assert_eq!(b"1000000\t", incrementing_string.buf.as_slice());
822+
incrementing_string.increment();
823+
assert_eq!(b"1000001\t", incrementing_string.buf.as_slice());
824+
}
746825
}

0 commit comments

Comments
 (0)