File tree Expand file tree Collapse file tree 2 files changed +31
-1
lines changed
Expand file tree Collapse file tree 2 files changed +31
-1
lines changed Original file line number Diff line number Diff line change @@ -13,6 +13,7 @@ mod word_count;
1313use std:: {
1414 borrow:: { Borrow , Cow } ,
1515 cmp:: max,
16+ env,
1617 ffi:: { OsStr , OsString } ,
1718 fs:: { self , File } ,
1819 io:: { self , Write } ,
@@ -578,10 +579,17 @@ fn process_chunk<
578579 text : & str ,
579580 current_len : & mut usize ,
580581 in_word : & mut bool ,
582+ posixly_correct : bool ,
581583) {
582584 for ch in text. chars ( ) {
583585 if SHOW_WORDS {
584- if ch. is_whitespace ( ) {
586+ let is_space = if posixly_correct {
587+ matches ! ( ch, '\t' ..='\r' | ' ' )
588+ } else {
589+ ch. is_whitespace ( )
590+ } ;
591+
592+ if is_space {
585593 * in_word = false ;
586594 } else if !( * in_word) {
587595 // This also counts control characters! (As of GNU coreutils 9.5)
@@ -639,6 +647,7 @@ fn word_count_from_reader_specialized<
639647 let mut reader = BufReadDecoder :: new ( reader. buffered ( ) ) ;
640648 let mut in_word = false ;
641649 let mut current_len = 0 ;
650+ let posixly_correct = env:: var_os ( "POSIXLY_CORRECT" ) . is_some ( ) ;
642651 while let Some ( chunk) = reader. next_strict ( ) {
643652 match chunk {
644653 Ok ( text) => {
@@ -647,6 +656,7 @@ fn word_count_from_reader_specialized<
647656 text,
648657 & mut current_len,
649658 & mut in_word,
659+ posixly_correct,
650660 ) ;
651661 }
652662 Err ( e) => {
Original file line number Diff line number Diff line change @@ -891,3 +891,23 @@ fn test_simd_respects_glibc_tunables() {
891891 ) ;
892892 }
893893}
894+
895+ #[ test]
896+ fn test_posixly_correct_whitespace ( ) {
897+ let input = "word\u{00A0} word" ; // Non-breaking space
898+
899+ // Default: Unicode whitespace is respected
900+ new_ucmd ! ( )
901+ . arg ( "-w" )
902+ . pipe_in ( input)
903+ . succeeds ( )
904+ . stdout_is ( "2\n " ) ;
905+
906+ // POSIXLY_CORRECT: Only ASCII whitespace
907+ new_ucmd ! ( )
908+ . arg ( "-w" )
909+ . env ( "POSIXLY_CORRECT" , "1" )
910+ . pipe_in ( input)
911+ . succeeds ( )
912+ . stdout_is ( "1\n " ) ;
913+ }
You can’t perform that action at this time.
0 commit comments