Skip to content

Commit c0fdbc4

Browse files
committed
cmp: stop allocating for byte printing
This makes verbose comparison of 37MB completely different files 2.34x faster than our own baseline, putting our cmp at almost 6x faster than GNU cmp (/opt/homebrew/bin/cmp) on my M4 Pro Mac. The output remains identical to that of GNU cmp. Mostly equal and smaller files do not regress. Benchmark 1: ./bin/baseline/diffutils cmp -lb t/huge t/eguh Time (mean ± σ): 1.669 s ± 0.011 s [User: 1.594 s, System: 0.073 s] Range (min … max): 1.654 s … 1.689 s 10 runs Warning: Ignoring non-zero exit code. Benchmark 2: ./target/release/diffutils cmp -lb t/huge t/eguh Time (mean ± σ): 714.2 ms ± 4.1 ms [User: 629.3 ms, System: 82.7 ms] Range (min … max): 707.2 ms … 721.5 ms 10 runs Warning: Ignoring non-zero exit code. Benchmark 3: /opt/homebrew/bin/cmp -lb t/huge t/eguh Time (mean ± σ): 4.213 s ± 0.050 s [User: 4.128 s, System: 0.081 s] Range (min … max): 4.160 s … 4.316 s 10 runs Warning: Ignoring non-zero exit code. Benchmark 4: /usr/bin/cmp -lb t/huge t/eguh Time (mean ± σ): 3.892 s ± 0.048 s [User: 3.819 s, System: 0.070 s] Range (min … max): 3.808 s … 3.976 s 10 runs Warning: Ignoring non-zero exit code. Summary ./target/release/diffutils cmp -lb t/huge t/eguh ran 2.34 ± 0.02 times faster than ./bin/baseline/diffutils cmp -lb t/huge t/eguh 5.45 ± 0.07 times faster than /usr/bin/cmp -lb t/huge t/eguh 5.90 ± 0.08 times faster than /opt/homebrew/bin/cmp -lb t/huge t/eguh
1 parent 7ddc6c6 commit c0fdbc4

File tree

1 file changed

+59
-37
lines changed

1 file changed

+59
-37
lines changed

src/cmp.rs

Lines changed: 59 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -500,12 +500,6 @@ pub fn main(opts: Peekable<ArgsOs>) -> ExitCode {
500500
}
501501
}
502502

503-
#[inline]
504-
fn is_ascii_printable(byte: u8) -> bool {
505-
let c = byte as char;
506-
c.is_ascii() && !c.is_ascii_control()
507-
}
508-
509503
#[inline]
510504
fn format_octal(byte: u8, buf: &mut [u8; 3]) -> &str {
511505
*buf = [b' ', b' ', b'0'];
@@ -525,32 +519,67 @@ fn format_octal(byte: u8, buf: &mut [u8; 3]) -> &str {
525519
}
526520

527521
#[inline]
528-
fn format_byte(byte: u8) -> String {
529-
let mut byte = byte;
530-
let mut quoted = vec![];
531-
532-
if !is_ascii_printable(byte) {
533-
if byte >= 128 {
534-
quoted.push(b'M');
535-
quoted.push(b'-');
536-
byte -= 128;
522+
fn write_visible_byte(output: &mut Vec<u8>, byte: u8) -> usize {
523+
match byte {
524+
// Control characters: ^@, ^A, ..., ^_
525+
0..=31 => {
526+
output.push(b'^');
527+
output.push(byte + 64);
528+
2
537529
}
538-
539-
if byte < 32 {
540-
quoted.push(b'^');
541-
byte += 64;
542-
} else if byte == 127 {
543-
quoted.push(b'^');
544-
byte = b'?';
530+
// Printable ASCII (space through ~)
531+
32..=126 => {
532+
output.push(byte);
533+
1
534+
}
535+
// DEL: ^?
536+
127 => {
537+
output.extend_from_slice(b"^?");
538+
2
539+
}
540+
// High bytes with control equivalents: M-^@, M-^A, ..., M-^_
541+
128..=159 => {
542+
output.push(b'M');
543+
output.push(b'-');
544+
output.push(b'^');
545+
output.push(byte - 64);
546+
4
547+
}
548+
// High bytes: M-<space>, M-!, ..., M-~
549+
160..=254 => {
550+
output.push(b'M');
551+
output.push(b'-');
552+
output.push(byte - 128);
553+
3
554+
}
555+
// Byte 255: M-^?
556+
255 => {
557+
output.extend_from_slice(b"M-^?");
558+
4
545559
}
546-
assert!((byte as char).is_ascii());
547560
}
561+
}
548562

549-
quoted.push(byte);
563+
/// Writes a byte in visible form with right-padding to the specified width
564+
#[inline]
565+
fn write_visible_byte_padded(output: &mut Vec<u8>, byte: u8, width: usize) {
566+
let display_width = write_visible_byte(output, byte);
550567

551-
// SAFETY: the checks and shifts we do above match what cat and GNU
568+
// Add right-padding spaces
569+
let padding = width.saturating_sub(display_width);
570+
for _ in 0..padding {
571+
output.push(b' ');
572+
}
573+
}
574+
575+
/// Formats a byte as a visible string (for non-performance-critical path)
576+
#[inline]
577+
fn format_visible_byte(byte: u8) -> String {
578+
let mut result = Vec::with_capacity(4);
579+
write_visible_byte(&mut result, byte);
580+
// SAFETY: the checks and shifts in write_visible_byte match what cat and GNU
552581
// cmp do to ensure characters fall inside the ascii range.
553-
unsafe { String::from_utf8_unchecked(quoted) }
582+
unsafe { String::from_utf8_unchecked(result) }
554583
}
555584

556585
// This function has been optimized to not use the Rust fmt system, which
@@ -588,22 +617,15 @@ fn format_verbose_difference(
588617

589618
output.push(b' ');
590619

591-
let from_byte_str = format_byte(from_byte);
592-
let from_byte_padding = 4 - from_byte_str.len();
593-
594-
output.extend_from_slice(from_byte_str.as_bytes());
595-
596-
for _ in 0..from_byte_padding {
597-
output.push(b' ')
598-
}
620+
write_visible_byte_padded(output, from_byte, 4);
599621

600622
output.push(b' ');
601623

602624
output.extend_from_slice(format_octal(to_byte, &mut to_oct).as_bytes());
603625

604626
output.push(b' ');
605627

606-
output.extend_from_slice(format_byte(to_byte).as_bytes());
628+
write_visible_byte(output, to_byte);
607629

608630
output.push(b'\n');
609631
} else {
@@ -706,9 +728,9 @@ fn report_difference(from_byte: u8, to_byte: u8, at_byte: usize, at_line: usize,
706728
print!(
707729
" is {:>3o} {:char_width$} {:>3o} {:char_width$}",
708730
from_byte,
709-
format_byte(from_byte),
731+
format_visible_byte(from_byte),
710732
to_byte,
711-
format_byte(to_byte)
733+
format_visible_byte(to_byte)
712734
);
713735
}
714736
println!();

0 commit comments

Comments
 (0)