Skip to content

Commit 933230e

Browse files
authored
Merge pull request #100 from kov/cmp-mem
cmp: print verbose diffs as we find them
2 parents 0bf04b4 + a316262 commit 933230e

File tree

2 files changed

+93
-99
lines changed

2 files changed

+93
-99
lines changed

src/cmp.rs

Lines changed: 92 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ use std::ffi::OsString;
99
use std::io::{BufRead, BufReader, BufWriter, Read, Write};
1010
use std::iter::Peekable;
1111
use std::process::ExitCode;
12-
use std::{fs, io};
12+
use std::{cmp, fs, io};
1313

1414
#[cfg(not(target_os = "windows"))]
1515
use std::os::fd::{AsRawFd, FromRawFd};
@@ -320,10 +320,35 @@ pub fn cmp(params: &Params) -> Result<Cmp, String> {
320320
let mut from = prepare_reader(&params.from, &params.skip_a, params)?;
321321
let mut to = prepare_reader(&params.to, &params.skip_b, params)?;
322322

323+
let mut offset_width = params.max_bytes.unwrap_or(usize::MAX);
324+
325+
if let (Ok(a_meta), Ok(b_meta)) = (fs::metadata(&params.from), fs::metadata(&params.to)) {
326+
#[cfg(not(target_os = "windows"))]
327+
let (a_size, b_size) = (a_meta.size(), b_meta.size());
328+
329+
#[cfg(target_os = "windows")]
330+
let (a_size, b_size) = (a_meta.file_size(), b_meta.file_size());
331+
332+
// If the files have different sizes, we already know they are not identical. If we have not
333+
// been asked to show even the first difference, we can quit early.
334+
if params.quiet && a_size != b_size {
335+
return Ok(Cmp::Different);
336+
}
337+
338+
let smaller = cmp::min(a_size, b_size) as usize;
339+
offset_width = cmp::min(smaller, offset_width);
340+
}
341+
342+
let offset_width = 1 + offset_width.checked_ilog10().unwrap_or(1) as usize;
343+
344+
// Capacity calc: at_byte width + 2 x 3-byte octal numbers + 2 x 4-byte value + 4 spaces
345+
let mut output = Vec::<u8>::with_capacity(offset_width + 3 * 2 + 4 * 2 + 4);
346+
323347
let mut at_byte = 1;
324348
let mut at_line = 1;
325349
let mut start_of_line = true;
326-
let mut verbose_diffs = vec![];
350+
let mut stdout = BufWriter::new(io::stdout().lock());
351+
let mut compare = Cmp::Equal;
327352
loop {
328353
// Fill up our buffers.
329354
let from_buf = match from.fill_buf() {
@@ -360,10 +385,6 @@ pub fn cmp(params: &Params) -> Result<Cmp, String> {
360385
&params.to.to_string_lossy()
361386
};
362387

363-
if params.verbose {
364-
report_verbose_diffs(verbose_diffs, params)?;
365-
}
366-
367388
report_eof(at_byte, at_line, start_of_line, eof_on, params);
368389
return Ok(Cmp::Different);
369390
}
@@ -395,8 +416,24 @@ pub fn cmp(params: &Params) -> Result<Cmp, String> {
395416
// first one runs out.
396417
for (&from_byte, &to_byte) in from_buf.iter().zip(to_buf.iter()) {
397418
if from_byte != to_byte {
419+
compare = Cmp::Different;
420+
398421
if params.verbose {
399-
verbose_diffs.push((at_byte, from_byte, to_byte));
422+
format_verbose_difference(
423+
from_byte,
424+
to_byte,
425+
at_byte,
426+
offset_width,
427+
&mut output,
428+
params,
429+
)?;
430+
stdout.write_all(output.as_slice()).map_err(|e| {
431+
format!(
432+
"{}: error printing output: {e}",
433+
params.executable.to_string_lossy()
434+
)
435+
})?;
436+
output.clear();
400437
} else {
401438
report_difference(from_byte, to_byte, at_byte, at_line, params);
402439
return Ok(Cmp::Different);
@@ -422,12 +459,7 @@ pub fn cmp(params: &Params) -> Result<Cmp, String> {
422459
to.consume(consumed);
423460
}
424461

425-
if params.verbose && !verbose_diffs.is_empty() {
426-
report_verbose_diffs(verbose_diffs, params)?;
427-
return Ok(Cmp::Different);
428-
}
429-
430-
Ok(Cmp::Equal)
462+
Ok(compare)
431463
}
432464

433465
// Exit codes are documented at
@@ -450,21 +482,6 @@ pub fn main(opts: Peekable<ArgsOs>) -> ExitCode {
450482
return ExitCode::SUCCESS;
451483
}
452484

453-
// If the files have different sizes, we already know they are not identical. If we have not
454-
// been asked to show even the first difference, we can quit early.
455-
if params.quiet {
456-
if let (Ok(a_meta), Ok(b_meta)) = (fs::metadata(&params.from), fs::metadata(&params.to)) {
457-
#[cfg(not(target_os = "windows"))]
458-
if a_meta.size() != b_meta.size() {
459-
return ExitCode::from(1);
460-
}
461-
#[cfg(target_os = "windows")]
462-
if a_meta.file_size() != b_meta.file_size() {
463-
return ExitCode::from(1);
464-
}
465-
}
466-
}
467-
468485
match cmp(&params) {
469486
Ok(Cmp::Equal) => ExitCode::SUCCESS,
470487
Ok(Cmp::Different) => ExitCode::from(1),
@@ -533,99 +550,76 @@ fn format_byte(byte: u8) -> String {
533550
// This function has been optimized to not use the Rust fmt system, which
534551
// leads to a massive speed up when processing large files: cuts the time
535552
// for comparing 2 ~36MB completely different files in half on an M1 Max.
536-
fn report_verbose_diffs(diffs: Vec<(usize, u8, u8)>, params: &Params) -> Result<(), String> {
553+
#[inline]
554+
fn format_verbose_difference(
555+
from_byte: u8,
556+
to_byte: u8,
557+
at_byte: usize,
558+
offset_width: usize,
559+
output: &mut Vec<u8>,
560+
params: &Params,
561+
) -> Result<(), String> {
537562
assert!(!params.quiet);
538563

539-
let mut stdout = BufWriter::new(io::stdout().lock());
540-
if let Some((offset, _, _)) = diffs.last() {
541-
// Obtain the width of the first column from the last byte offset.
542-
let width = format!("{}", offset).len();
543-
544-
let mut at_byte_buf = itoa::Buffer::new();
545-
let mut from_oct = [0u8; 3]; // for octal conversions
546-
let mut to_oct = [0u8; 3];
547-
548-
// Capacity calc: at_byte width + 2 x 3-byte octal numbers + 4-byte value + up to 2 byte value + 4 spaces
549-
let mut output = Vec::<u8>::with_capacity(width + 3 * 2 + 4 + 2 + 4);
550-
551-
if params.print_bytes {
552-
for (at_byte, from_byte, to_byte) in diffs {
553-
output.clear();
554-
555-
// "{:>width$} {:>3o} {:4} {:>3o} {}",
556-
let at_byte_str = at_byte_buf.format(at_byte);
557-
let at_byte_padding = width - at_byte_str.len();
564+
let mut at_byte_buf = itoa::Buffer::new();
565+
let mut from_oct = [0u8; 3]; // for octal conversions
566+
let mut to_oct = [0u8; 3];
558567

559-
for _ in 0..at_byte_padding {
560-
output.push(b' ')
561-
}
562-
563-
output.extend_from_slice(at_byte_str.as_bytes());
564-
565-
output.push(b' ');
568+
if params.print_bytes {
569+
// "{:>width$} {:>3o} {:4} {:>3o} {}",
570+
let at_byte_str = at_byte_buf.format(at_byte);
571+
let at_byte_padding = offset_width.saturating_sub(at_byte_str.len());
566572

567-
output.extend_from_slice(format_octal(from_byte, &mut from_oct).as_bytes());
573+
for _ in 0..at_byte_padding {
574+
output.push(b' ')
575+
}
568576

569-
output.push(b' ');
577+
output.extend_from_slice(at_byte_str.as_bytes());
570578

571-
let from_byte_str = format_byte(from_byte);
572-
let from_byte_padding = 4 - from_byte_str.len();
579+
output.push(b' ');
573580

574-
output.extend_from_slice(from_byte_str.as_bytes());
581+
output.extend_from_slice(format_octal(from_byte, &mut from_oct).as_bytes());
575582

576-
for _ in 0..from_byte_padding {
577-
output.push(b' ')
578-
}
583+
output.push(b' ');
579584

580-
output.push(b' ');
585+
let from_byte_str = format_byte(from_byte);
586+
let from_byte_padding = 4 - from_byte_str.len();
581587

582-
output.extend_from_slice(format_octal(to_byte, &mut to_oct).as_bytes());
588+
output.extend_from_slice(from_byte_str.as_bytes());
583589

584-
output.push(b' ');
590+
for _ in 0..from_byte_padding {
591+
output.push(b' ')
592+
}
585593

586-
output.extend_from_slice(format_byte(to_byte).as_bytes());
594+
output.push(b' ');
587595

588-
output.push(b'\n');
596+
output.extend_from_slice(format_octal(to_byte, &mut to_oct).as_bytes());
589597

590-
stdout.write_all(output.as_slice()).map_err(|e| {
591-
format!(
592-
"{}: error printing output: {e}",
593-
params.executable.to_string_lossy()
594-
)
595-
})?;
596-
}
597-
} else {
598-
for (at_byte, from_byte, to_byte) in diffs {
599-
output.clear();
598+
output.push(b' ');
600599

601-
// "{:>width$} {:>3o} {:>3o}"
602-
let at_byte_str = at_byte_buf.format(at_byte);
603-
let at_byte_padding = width - at_byte_str.len();
600+
output.extend_from_slice(format_byte(to_byte).as_bytes());
604601

605-
for _ in 0..at_byte_padding {
606-
output.push(b' ')
607-
}
602+
output.push(b'\n');
603+
} else {
604+
// "{:>width$} {:>3o} {:>3o}"
605+
let at_byte_str = at_byte_buf.format(at_byte);
606+
let at_byte_padding = offset_width - at_byte_str.len();
608607

609-
output.extend_from_slice(at_byte_str.as_bytes());
608+
for _ in 0..at_byte_padding {
609+
output.push(b' ')
610+
}
610611

611-
output.push(b' ');
612+
output.extend_from_slice(at_byte_str.as_bytes());
612613

613-
output.extend_from_slice(format_octal(from_byte, &mut from_oct).as_bytes());
614+
output.push(b' ');
614615

615-
output.push(b' ');
616+
output.extend_from_slice(format_octal(from_byte, &mut from_oct).as_bytes());
616617

617-
output.extend_from_slice(format_octal(to_byte, &mut to_oct).as_bytes());
618+
output.push(b' ');
618619

619-
output.push(b'\n');
620+
output.extend_from_slice(format_octal(to_byte, &mut to_oct).as_bytes());
620621

621-
stdout.write_all(output.as_slice()).map_err(|e| {
622-
format!(
623-
"{}: error printing output: {e}",
624-
params.executable.to_string_lossy()
625-
)
626-
})?;
627-
}
628-
}
622+
output.push(b'\n');
629623
}
630624

631625
Ok(())

tests/integration.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -616,7 +616,7 @@ mod cmp {
616616
.code(predicate::eq(1))
617617
.failure()
618618
.stderr(predicate::str::is_empty())
619-
.stdout(predicate::eq("4 40 144 d\n8 40 150 h\n"));
619+
.stdout(predicate::eq(" 4 40 144 d\n 8 40 150 h\n"));
620620
Ok(())
621621
}
622622

0 commit comments

Comments
 (0)