@@ -349,7 +349,62 @@ enum CharType {
349349 Other ,
350350}
351351
352- #[ allow( clippy:: cognitive_complexity) ]
352+ /// Classify a character and determine its width and byte length.
353+ ///
354+ /// Returns `(CharType, display_width, byte_length)`.
355+ #[ inline]
356+ fn classify_char ( buf : & [ u8 ] , byte : usize , uflag : bool ) -> ( CharType , usize , usize ) {
357+ use self :: CharType :: { Backspace , Other , Tab } ;
358+
359+ if uflag {
360+ let nbytes = char:: from ( buf[ byte] ) . len_utf8 ( ) ;
361+
362+ if byte + nbytes > buf. len ( ) {
363+ // don't overrun buffer because of invalid UTF-8
364+ return ( Other , 1 , 1 ) ;
365+ }
366+
367+ if let Ok ( t) = from_utf8 ( & buf[ byte..byte + nbytes] ) {
368+ match t. chars ( ) . next ( ) {
369+ Some ( '\t' ) => ( Tab , 0 , nbytes) ,
370+ Some ( '\x08' ) => ( Backspace , 0 , nbytes) ,
371+ Some ( c) => ( Other , UnicodeWidthChar :: width ( c) . unwrap_or ( 0 ) , nbytes) ,
372+ None => {
373+ // no valid char at start of t, so take 1 byte
374+ ( Other , 1 , 1 )
375+ }
376+ }
377+ } else {
378+ ( Other , 1 , 1 ) // implicit assumption: non-UTF-8 char is 1 col wide
379+ }
380+ } else {
381+ (
382+ match buf. get ( byte) {
383+ // always take exactly 1 byte in strict ASCII mode
384+ Some ( 0x09 ) => Tab ,
385+ Some ( 0x08 ) => Backspace ,
386+ _ => Other ,
387+ } ,
388+ 1 ,
389+ 1 ,
390+ )
391+ }
392+ }
393+
394+ /// Write spaces for a tab expansion.
395+ #[ inline]
396+ fn write_tab_spaces (
397+ output : & mut BufWriter < std:: io:: Stdout > ,
398+ nts : usize ,
399+ tspaces : & str ,
400+ ) -> std:: io:: Result < ( ) > {
401+ if nts <= tspaces. len ( ) {
402+ output. write_all ( & tspaces. as_bytes ( ) [ ..nts] )
403+ } else {
404+ output. write_all ( " " . repeat ( nts) . as_bytes ( ) )
405+ }
406+ }
407+
353408fn expand_line (
354409 buf : & mut Vec < u8 > ,
355410 output : & mut BufWriter < std:: io:: Stdout > ,
@@ -372,37 +427,7 @@ fn expand_line(
372427 let mut init = true ;
373428
374429 while byte < buf. len ( ) {
375- let ( ctype, cwidth, nbytes) = if options. uflag {
376- let nbytes = char:: from ( buf[ byte] ) . len_utf8 ( ) ;
377-
378- if byte + nbytes > buf. len ( ) {
379- // don't overrun buffer because of invalid UTF-8
380- ( Other , 1 , 1 )
381- } else if let Ok ( t) = from_utf8 ( & buf[ byte..byte + nbytes] ) {
382- match t. chars ( ) . next ( ) {
383- Some ( '\t' ) => ( Tab , 0 , nbytes) ,
384- Some ( '\x08' ) => ( Backspace , 0 , nbytes) ,
385- Some ( c) => ( Other , UnicodeWidthChar :: width ( c) . unwrap_or ( 0 ) , nbytes) ,
386- None => {
387- // no valid char at start of t, so take 1 byte
388- ( Other , 1 , 1 )
389- }
390- }
391- } else {
392- ( Other , 1 , 1 ) // implicit assumption: non-UTF-8 char is 1 col wide
393- }
394- } else {
395- (
396- match buf. get ( byte) {
397- // always take exactly 1 byte in strict ASCII mode
398- Some ( 0x09 ) => Tab ,
399- Some ( 0x08 ) => Backspace ,
400- _ => Other ,
401- } ,
402- 1 ,
403- 1 ,
404- )
405- } ;
430+ let ( ctype, cwidth, nbytes) = classify_char ( buf, byte, options. uflag ) ;
406431
407432 // figure out how many columns this char takes up
408433 match ctype {
@@ -413,23 +438,24 @@ fn expand_line(
413438
414439 // now dump out either spaces if we're expanding, or a literal tab if we're not
415440 if init || !options. iflag {
416- if nts <= options. tspaces . len ( ) {
417- output. write_all ( & options. tspaces . as_bytes ( ) [ ..nts] ) ?;
418- } else {
419- output. write_all ( " " . repeat ( nts) . as_bytes ( ) ) ?;
420- }
441+ write_tab_spaces ( output, nts, & options. tspaces ) ?;
421442 } else {
422443 output. write_all ( & buf[ byte..byte + nbytes] ) ?;
423444 }
424445 }
425- _ => {
426- col = if ctype == Other {
427- col + cwidth
428- } else if col > 0 {
429- col - 1
430- } else {
431- 0
432- } ;
446+ Backspace => {
447+ col = col. saturating_sub ( 1 ) ;
448+
449+ // if we're writing anything other than a space, then we're
450+ // done with the line's leading spaces
451+ if buf[ byte] != 0x20 {
452+ init = false ;
453+ }
454+
455+ output. write_all ( & buf[ byte..byte + nbytes] ) ?;
456+ }
457+ Other => {
458+ col += cwidth;
433459
434460 // if we're writing anything other than a space, then we're
435461 // done with the line's leading spaces
0 commit comments