@@ -9,7 +9,7 @@ use std::ffi::OsString;
99use std:: io:: { BufRead , BufReader , BufWriter , Read , Write } ;
1010use std:: iter:: Peekable ;
1111use std:: process:: ExitCode ;
12- use std:: { fs, io} ;
12+ use std:: { cmp , fs, io} ;
1313
1414#[ cfg( not( target_os = "windows" ) ) ]
1515use std:: os:: fd:: { AsRawFd , FromRawFd } ;
@@ -320,10 +320,35 @@ pub fn cmp(params: &Params) -> Result<Cmp, String> {
320320 let mut from = prepare_reader ( & params. from , & params. skip_a , params) ?;
321321 let mut to = prepare_reader ( & params. to , & params. skip_b , params) ?;
322322
323+ let mut offset_width = params. max_bytes . unwrap_or ( usize:: MAX ) ;
324+
325+ if let ( Ok ( a_meta) , Ok ( b_meta) ) = ( fs:: metadata ( & params. from ) , fs:: metadata ( & params. to ) ) {
326+ #[ cfg( not( target_os = "windows" ) ) ]
327+ let ( a_size, b_size) = ( a_meta. size ( ) , b_meta. size ( ) ) ;
328+
329+ #[ cfg( target_os = "windows" ) ]
330+ let ( a_size, b_size) = ( a_meta. file_size ( ) , b_meta. file_size ( ) ) ;
331+
332+ // If the files have different sizes, we already know they are not identical. If we have not
333+ // been asked to show even the first difference, we can quit early.
334+ if params. quiet && a_size != b_size {
335+ return Ok ( Cmp :: Different ) ;
336+ }
337+
338+ let smaller = cmp:: min ( a_size, b_size) as usize ;
339+ offset_width = cmp:: min ( smaller, offset_width) ;
340+ }
341+
342+ let offset_width = 1 + offset_width. checked_ilog10 ( ) . unwrap_or ( 1 ) as usize ;
343+
344+ // Capacity calc: at_byte width + 2 x 3-byte octal numbers + 2 x 4-byte value + 4 spaces
345+ let mut output = Vec :: < u8 > :: with_capacity ( offset_width + 3 * 2 + 4 * 2 + 4 ) ;
346+
323347 let mut at_byte = 1 ;
324348 let mut at_line = 1 ;
325349 let mut start_of_line = true ;
326- let mut verbose_diffs = vec ! [ ] ;
350+ let mut stdout = BufWriter :: new ( io:: stdout ( ) . lock ( ) ) ;
351+ let mut compare = Cmp :: Equal ;
327352 loop {
328353 // Fill up our buffers.
329354 let from_buf = match from. fill_buf ( ) {
@@ -360,10 +385,6 @@ pub fn cmp(params: &Params) -> Result<Cmp, String> {
360385 & params. to . to_string_lossy ( )
361386 } ;
362387
363- if params. verbose {
364- report_verbose_diffs ( verbose_diffs, params) ?;
365- }
366-
367388 report_eof ( at_byte, at_line, start_of_line, eof_on, params) ;
368389 return Ok ( Cmp :: Different ) ;
369390 }
@@ -395,8 +416,24 @@ pub fn cmp(params: &Params) -> Result<Cmp, String> {
395416 // first one runs out.
396417 for ( & from_byte, & to_byte) in from_buf. iter ( ) . zip ( to_buf. iter ( ) ) {
397418 if from_byte != to_byte {
419+ compare = Cmp :: Different ;
420+
398421 if params. verbose {
399- verbose_diffs. push ( ( at_byte, from_byte, to_byte) ) ;
422+ format_verbose_difference (
423+ from_byte,
424+ to_byte,
425+ at_byte,
426+ offset_width,
427+ & mut output,
428+ params,
429+ ) ?;
430+ stdout. write_all ( output. as_slice ( ) ) . map_err ( |e| {
431+ format ! (
432+ "{}: error printing output: {e}" ,
433+ params. executable. to_string_lossy( )
434+ )
435+ } ) ?;
436+ output. clear ( ) ;
400437 } else {
401438 report_difference ( from_byte, to_byte, at_byte, at_line, params) ;
402439 return Ok ( Cmp :: Different ) ;
@@ -422,12 +459,7 @@ pub fn cmp(params: &Params) -> Result<Cmp, String> {
422459 to. consume ( consumed) ;
423460 }
424461
425- if params. verbose && !verbose_diffs. is_empty ( ) {
426- report_verbose_diffs ( verbose_diffs, params) ?;
427- return Ok ( Cmp :: Different ) ;
428- }
429-
430- Ok ( Cmp :: Equal )
462+ Ok ( compare)
431463}
432464
433465// Exit codes are documented at
@@ -450,21 +482,6 @@ pub fn main(opts: Peekable<ArgsOs>) -> ExitCode {
450482 return ExitCode :: SUCCESS ;
451483 }
452484
453- // If the files have different sizes, we already know they are not identical. If we have not
454- // been asked to show even the first difference, we can quit early.
455- if params. quiet {
456- if let ( Ok ( a_meta) , Ok ( b_meta) ) = ( fs:: metadata ( & params. from ) , fs:: metadata ( & params. to ) ) {
457- #[ cfg( not( target_os = "windows" ) ) ]
458- if a_meta. size ( ) != b_meta. size ( ) {
459- return ExitCode :: from ( 1 ) ;
460- }
461- #[ cfg( target_os = "windows" ) ]
462- if a_meta. file_size ( ) != b_meta. file_size ( ) {
463- return ExitCode :: from ( 1 ) ;
464- }
465- }
466- }
467-
468485 match cmp ( & params) {
469486 Ok ( Cmp :: Equal ) => ExitCode :: SUCCESS ,
470487 Ok ( Cmp :: Different ) => ExitCode :: from ( 1 ) ,
@@ -533,99 +550,76 @@ fn format_byte(byte: u8) -> String {
533550// This function has been optimized to not use the Rust fmt system, which
534551// leads to a massive speed up when processing large files: cuts the time
535552// for comparing 2 ~36MB completely different files in half on an M1 Max.
536- fn report_verbose_diffs ( diffs : Vec < ( usize , u8 , u8 ) > , params : & Params ) -> Result < ( ) , String > {
553+ #[ inline]
554+ fn format_verbose_difference (
555+ from_byte : u8 ,
556+ to_byte : u8 ,
557+ at_byte : usize ,
558+ offset_width : usize ,
559+ output : & mut Vec < u8 > ,
560+ params : & Params ,
561+ ) -> Result < ( ) , String > {
537562 assert ! ( !params. quiet) ;
538563
539- let mut stdout = BufWriter :: new ( io:: stdout ( ) . lock ( ) ) ;
540- if let Some ( ( offset, _, _) ) = diffs. last ( ) {
541- // Obtain the width of the first column from the last byte offset.
542- let width = format ! ( "{}" , offset) . len ( ) ;
543-
544- let mut at_byte_buf = itoa:: Buffer :: new ( ) ;
545- let mut from_oct = [ 0u8 ; 3 ] ; // for octal conversions
546- let mut to_oct = [ 0u8 ; 3 ] ;
547-
548- // Capacity calc: at_byte width + 2 x 3-byte octal numbers + 4-byte value + up to 2 byte value + 4 spaces
549- let mut output = Vec :: < u8 > :: with_capacity ( width + 3 * 2 + 4 + 2 + 4 ) ;
550-
551- if params. print_bytes {
552- for ( at_byte, from_byte, to_byte) in diffs {
553- output. clear ( ) ;
554-
555- // "{:>width$} {:>3o} {:4} {:>3o} {}",
556- let at_byte_str = at_byte_buf. format ( at_byte) ;
557- let at_byte_padding = width - at_byte_str. len ( ) ;
564+ let mut at_byte_buf = itoa:: Buffer :: new ( ) ;
565+ let mut from_oct = [ 0u8 ; 3 ] ; // for octal conversions
566+ let mut to_oct = [ 0u8 ; 3 ] ;
558567
559- for _ in 0 ..at_byte_padding {
560- output. push ( b' ' )
561- }
562-
563- output. extend_from_slice ( at_byte_str. as_bytes ( ) ) ;
564-
565- output. push ( b' ' ) ;
568+ if params. print_bytes {
569+ // "{:>width$} {:>3o} {:4} {:>3o} {}",
570+ let at_byte_str = at_byte_buf. format ( at_byte) ;
571+ let at_byte_padding = offset_width. saturating_sub ( at_byte_str. len ( ) ) ;
566572
567- output. extend_from_slice ( format_octal ( from_byte, & mut from_oct) . as_bytes ( ) ) ;
573+ for _ in 0 ..at_byte_padding {
574+ output. push ( b' ' )
575+ }
568576
569- output. push ( b' ' ) ;
577+ output. extend_from_slice ( at_byte_str . as_bytes ( ) ) ;
570578
571- let from_byte_str = format_byte ( from_byte) ;
572- let from_byte_padding = 4 - from_byte_str. len ( ) ;
579+ output. push ( b' ' ) ;
573580
574- output. extend_from_slice ( from_byte_str . as_bytes ( ) ) ;
581+ output. extend_from_slice ( format_octal ( from_byte , & mut from_oct ) . as_bytes ( ) ) ;
575582
576- for _ in 0 ..from_byte_padding {
577- output. push ( b' ' )
578- }
583+ output. push ( b' ' ) ;
579584
580- output. push ( b' ' ) ;
585+ let from_byte_str = format_byte ( from_byte) ;
586+ let from_byte_padding = 4 - from_byte_str. len ( ) ;
581587
582- output. extend_from_slice ( format_octal ( to_byte , & mut to_oct ) . as_bytes ( ) ) ;
588+ output. extend_from_slice ( from_byte_str . as_bytes ( ) ) ;
583589
584- output. push ( b' ' ) ;
590+ for _ in 0 ..from_byte_padding {
591+ output. push ( b' ' )
592+ }
585593
586- output. extend_from_slice ( format_byte ( to_byte ) . as_bytes ( ) ) ;
594+ output. push ( b' ' ) ;
587595
588- output . push ( b'\n' ) ;
596+ output . extend_from_slice ( format_octal ( to_byte , & mut to_oct ) . as_bytes ( ) ) ;
589597
590- stdout. write_all ( output. as_slice ( ) ) . map_err ( |e| {
591- format ! (
592- "{}: error printing output: {e}" ,
593- params. executable. to_string_lossy( )
594- )
595- } ) ?;
596- }
597- } else {
598- for ( at_byte, from_byte, to_byte) in diffs {
599- output. clear ( ) ;
598+ output. push ( b' ' ) ;
600599
601- // "{:>width$} {:>3o} {:>3o}"
602- let at_byte_str = at_byte_buf. format ( at_byte) ;
603- let at_byte_padding = width - at_byte_str. len ( ) ;
600+ output. extend_from_slice ( format_byte ( to_byte) . as_bytes ( ) ) ;
604601
605- for _ in 0 ..at_byte_padding {
606- output. push ( b' ' )
607- }
602+ output. push ( b'\n' ) ;
603+ } else {
604+ // "{:>width$} {:>3o} {:>3o}"
605+ let at_byte_str = at_byte_buf. format ( at_byte) ;
606+ let at_byte_padding = offset_width - at_byte_str. len ( ) ;
608607
609- output. extend_from_slice ( at_byte_str. as_bytes ( ) ) ;
608+ for _ in 0 ..at_byte_padding {
609+ output. push ( b' ' )
610+ }
610611
611- output. push ( b' ' ) ;
612+ output. extend_from_slice ( at_byte_str . as_bytes ( ) ) ;
612613
613- output. extend_from_slice ( format_octal ( from_byte , & mut from_oct ) . as_bytes ( ) ) ;
614+ output. push ( b' ' ) ;
614615
615- output. push ( b' ' ) ;
616+ output. extend_from_slice ( format_octal ( from_byte , & mut from_oct ) . as_bytes ( ) ) ;
616617
617- output. extend_from_slice ( format_octal ( to_byte , & mut to_oct ) . as_bytes ( ) ) ;
618+ output. push ( b' ' ) ;
618619
619- output . push ( b'\n' ) ;
620+ output . extend_from_slice ( format_octal ( to_byte , & mut to_oct ) . as_bytes ( ) ) ;
620621
621- stdout. write_all ( output. as_slice ( ) ) . map_err ( |e| {
622- format ! (
623- "{}: error printing output: {e}" ,
624- params. executable. to_string_lossy( )
625- )
626- } ) ?;
627- }
628- }
622+ output. push ( b'\n' ) ;
629623 }
630624
631625 Ok ( ( ) )
0 commit comments