File tree Expand file tree Collapse file tree 3 files changed +6
-13
lines changed
Expand file tree Collapse file tree 3 files changed +6
-13
lines changed Original file line number Diff line number Diff line change @@ -26,10 +26,11 @@ output of uutils `cat` into it. Note that GNU `cat` is slower and therefore less
2626suitable, and that if a file is given as its input directly (as in
2727` wc -c < largefile ` ) the first strategy kicks in. Try ` uucat somefile | wc -c ` .
2828
29- ### Counting lines
29+ ### Counting lines and UTF-8 characters
3030
31- In the case of ` wc -l ` or ` wc -cl ` the input doesn't have to be decoded. It's
32- read in chunks and the ` bytecount ` crate is used to count the newlines.
31+ If the flags set are a subset of ` -clm ` then the input doesn't have to be decoded. The
32+ input is read in chunks and the ` bytecount ` crate is used to count the newlines (` -l ` flag)
33+ and/or UTF-8 characters (` -m ` flag).
3334
3435It's useful to vary the line length in the input. GNU wc seems particularly
3536bad at short lines.
Original file line number Diff line number Diff line change @@ -19,7 +19,7 @@ path = "src/wc.rs"
1919[dependencies ]
2020clap = { workspace = true }
2121uucore = { workspace = true , features = [" pipes" , " quoting-style" ] }
22- bytecount = { workspace = true }
22+ bytecount = { workspace = true , features = [ " runtime-dispatch-simd " ] }
2323thiserror = { workspace = true }
2424unicode-width = { workspace = true }
2525
Original file line number Diff line number Diff line change @@ -212,11 +212,6 @@ pub(crate) fn count_bytes_chars_and_lines_fast<
212212> (
213213 handle : & mut R ,
214214) -> ( WordCount , Option < io:: Error > ) {
215- /// Mask of the value bits of a continuation byte
216- const CONT_MASK : u8 = 0b0011_1111u8 ;
217- /// Value of the tag bits (tag mask is !CONT_MASK) of a continuation byte
218- const TAG_CONT_U8 : u8 = 0b1000_0000u8 ;
219-
220215 let mut total = WordCount :: default ( ) ;
221216 let mut buf = [ 0 ; BUF_SIZE ] ;
222217 loop {
@@ -227,10 +222,7 @@ pub(crate) fn count_bytes_chars_and_lines_fast<
227222 total. bytes += n;
228223 }
229224 if COUNT_CHARS {
230- total. chars += buf[ ..n]
231- . iter ( )
232- . filter ( |& & byte| ( byte & !CONT_MASK ) != TAG_CONT_U8 )
233- . count ( ) ;
225+ total. chars += bytecount:: num_chars ( & buf[ ..n] ) ;
234226 }
235227 if COUNT_LINES {
236228 total. lines += bytecount:: count ( & buf[ ..n] , b'\n' ) ;
You can’t perform that action at this time.
0 commit comments