Skip to content

Commit b031c51

Browse files
committed
slice/ascii: Optimize eq_ignore_ascii_case with auto-vectorization
Refactor the current functionality into a helper function Use `as_chunks` to encourage auto-vectorization in the optimized chunk processing function Add a codegen test Add benches for `eq_ignore_ascii_case` The optimized function is initially only enabled for x86_64 which has `sse2` as part of its baseline, but none of the code is platform specific. Other platforms with SIMD instructions may also benefit from this implementation. Performance improvements only manifest for slices of 16 bytes or longer, so the optimized path is gated behind a length check for greater than or equal to 16.
1 parent ff5be13 commit b031c51

File tree

4 files changed

+114
-0
lines changed

4 files changed

+114
-0
lines changed

library/core/src/slice/ascii.rs

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,18 @@ impl [u8] {
6060
return false;
6161
}
6262

63+
#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
64+
if self.len() >= 16 {
65+
return self.eq_ignore_ascii_case_chunks(other);
66+
}
67+
68+
self.eq_ignore_ascii_case_simple(other)
69+
}
70+
71+
/// ASCII case-insensitive equality check without chunk-at-a-time
72+
/// optimization.
73+
#[inline]
74+
const fn eq_ignore_ascii_case_simple(&self, other: &[u8]) -> bool {
6375
// FIXME(const-hack): This implementation can be reverted when
6476
// `core::iter::zip` is allowed in const. The original implementation:
6577
// self.len() == other.len() && iter::zip(self, other).all(|(a, b)| a.eq_ignore_ascii_case(b))
@@ -78,6 +90,37 @@ impl [u8] {
7890
true
7991
}
8092

93+
/// Optimized version of `eq_ignore_ascii_case` which processes chunks at a
94+
/// time.
95+
///
96+
/// Platforms that have SIMD instructions may benefit from this
97+
/// implementation over `eq_ignore_ascii_case_simple`.
98+
#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
99+
#[inline]
100+
const fn eq_ignore_ascii_case_chunks(&self, other: &[u8]) -> bool {
101+
const N: usize = 16;
102+
let (a, a_rem) = self.as_chunks::<N>();
103+
let (b, b_rem) = other.as_chunks::<N>();
104+
105+
let mut i = 0;
106+
while i < a.len() && i < b.len() {
107+
let mut equal_ascii = true;
108+
let mut j = 0;
109+
while j < N {
110+
equal_ascii &= a[i][j].eq_ignore_ascii_case(&b[i][j]);
111+
j += 1;
112+
}
113+
114+
if !equal_ascii {
115+
return false;
116+
}
117+
118+
i += 1;
119+
}
120+
121+
a_rem.eq_ignore_ascii_case_simple(b_rem)
122+
}
123+
81124
/// Converts this slice to its ASCII upper case equivalent in-place.
82125
///
83126
/// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',

library/coretests/benches/ascii.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
mod eq_ignore_ascii_case;
12
mod is_ascii;
23

34
// Lower-case ASCII 'a' is the first byte that has its highest bit set
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
use test::Bencher;
2+
3+
#[bench]
4+
fn bench_str_under_8_bytes_eq(b: &mut Bencher) {
5+
let s = "foo";
6+
let other = "FOo";
7+
b.iter(|| {
8+
assert!(s.eq_ignore_ascii_case(other));
9+
})
10+
}
11+
12+
#[bench]
13+
fn bench_str_of_8_bytes_eq(b: &mut Bencher) {
14+
let s = "foobar78";
15+
let other = "FOObAr78";
16+
b.iter(|| {
17+
assert!(s.eq_ignore_ascii_case(other));
18+
})
19+
}
20+
21+
#[bench]
22+
fn bench_str_17_bytes_eq(b: &mut Bencher) {
23+
let s = "performance-criti";
24+
let other = "performANce-cRIti";
25+
b.iter(|| {
26+
assert!(s.eq_ignore_ascii_case(other));
27+
})
28+
}
29+
30+
#[bench]
31+
fn bench_str_31_bytes_eq(b: &mut Bencher) {
32+
let s = "foobarbazquux02foobarbazquux025";
33+
let other = "fooBARbazQuuX02fooBARbazQuuX025";
34+
b.iter(|| {
35+
assert!(s.eq_ignore_ascii_case(other));
36+
})
37+
}
38+
39+
#[bench]
40+
fn bench_long_str_eq(b: &mut Bencher) {
41+
let s = "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor \
42+
incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud \
43+
exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute \
44+
irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla \
45+
pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui \
46+
officia deserunt mollit anim id est laborum.";
47+
let other = "Lorem ipsum dolor sit amet, CONSECTETUR adipisicing elit, sed do eiusmod tempor \
48+
incididunt ut labore et dolore MAGNA aliqua. Ut enim ad MINIM veniam, quis nostrud \
49+
exercitation ullamco LABORIS nisi ut aliquip ex ea commodo consequat. Duis aute \
50+
irure dolor in reprehenderit in voluptate velit esse cillum DOLORE eu fugiat nulla \
51+
pariatur. Excepteur sint occaecat CUPIDATAT non proident, sunt in culpa qui \
52+
officia deserunt mollit anim id est laborum.";
53+
b.iter(|| {
54+
assert!(s.eq_ignore_ascii_case(other));
55+
})
56+
}
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
//@ compile-flags: -Copt-level=3
2+
//@ only-x86_64
3+
#![crate_type = "lib"]
4+
5+
// Ensure that the optimized variant of the function gets auto-vectorized.
6+
// CHECK-LABEL: @eq_ignore_ascii_case_autovectorized
7+
#[no_mangle]
8+
pub fn eq_ignore_ascii_case_autovectorized(s: &str, other: &str) -> bool {
9+
// CHECK: load <16 x i8>
10+
// CHECK: load <16 x i8>
11+
// CHECK: bitcast <16 x i1>
12+
// CHECK-NOT: panic
13+
s.eq_ignore_ascii_case(other)
14+
}

0 commit comments

Comments
 (0)