Skip to content

Commit 187d3e5

Browse files
authored
Merge pull request #7495 from karlmcdowall/wc_perf
wc: Perf gains with the bytecount crate.
2 parents 1983b57 + eea6c82 commit 187d3e5

File tree

3 files changed

+6
-13
lines changed

3 files changed

+6
-13
lines changed

src/uu/wc/BENCHMARKING.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,11 @@ output of uutils `cat` into it. Note that GNU `cat` is slower and therefore less
2626
suitable, and that if a file is given as its input directly (as in
2727
`wc -c < largefile`) the first strategy kicks in. Try `uucat somefile | wc -c`.
2828

29-
### Counting lines
29+
### Counting lines and UTF-8 characters
3030

31-
In the case of `wc -l` or `wc -cl` the input doesn't have to be decoded. It's
32-
read in chunks and the `bytecount` crate is used to count the newlines.
31+
If the flags set are a subset of `-clm` then the input doesn't have to be decoded. The
32+
input is read in chunks and the `bytecount` crate is used to count the newlines (`-l` flag)
33+
and/or UTF-8 characters (`-m` flag).
3334

3435
It's useful to vary the line length in the input. GNU wc seems particularly
3536
bad at short lines.

src/uu/wc/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ path = "src/wc.rs"
1919
[dependencies]
2020
clap = { workspace = true }
2121
uucore = { workspace = true, features = ["pipes", "quoting-style"] }
22-
bytecount = { workspace = true }
22+
bytecount = { workspace = true, features = ["runtime-dispatch-simd"] }
2323
thiserror = { workspace = true }
2424
unicode-width = { workspace = true }
2525

src/uu/wc/src/count_fast.rs

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -212,11 +212,6 @@ pub(crate) fn count_bytes_chars_and_lines_fast<
212212
>(
213213
handle: &mut R,
214214
) -> (WordCount, Option<io::Error>) {
215-
/// Mask of the value bits of a continuation byte
216-
const CONT_MASK: u8 = 0b0011_1111u8;
217-
/// Value of the tag bits (tag mask is !CONT_MASK) of a continuation byte
218-
const TAG_CONT_U8: u8 = 0b1000_0000u8;
219-
220215
let mut total = WordCount::default();
221216
let mut buf = [0; BUF_SIZE];
222217
loop {
@@ -227,10 +222,7 @@ pub(crate) fn count_bytes_chars_and_lines_fast<
227222
total.bytes += n;
228223
}
229224
if COUNT_CHARS {
230-
total.chars += buf[..n]
231-
.iter()
232-
.filter(|&&byte| (byte & !CONT_MASK) != TAG_CONT_U8)
233-
.count();
225+
total.chars += bytecount::num_chars(&buf[..n]);
234226
}
235227
if COUNT_LINES {
236228
total.lines += bytecount::count(&buf[..n], b'\n');

0 commit comments

Comments
 (0)