diff --git a/library/core/src/slice/ascii.rs b/library/core/src/slice/ascii.rs index e17a2e03d2dc4..8b713947cd9c7 100644 --- a/library/core/src/slice/ascii.rs +++ b/library/core/src/slice/ascii.rs @@ -60,6 +60,18 @@ impl [u8] { return false; } + #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] + if self.len() >= 16 { + return self.eq_ignore_ascii_case_chunks(other); + } + + self.eq_ignore_ascii_case_simple(other) + } + + /// ASCII case-insensitive equality check without chunk-at-a-time + /// optimization. + #[inline] + const fn eq_ignore_ascii_case_simple(&self, other: &[u8]) -> bool { // FIXME(const-hack): This implementation can be reverted when // `core::iter::zip` is allowed in const. The original implementation: // self.len() == other.len() && iter::zip(self, other).all(|(a, b)| a.eq_ignore_ascii_case(b)) @@ -78,6 +90,53 @@ impl [u8] { true } + /// Optimized version of `eq_ignore_ascii_case` for byte lengths of at least + /// 16 bytes, which processes chunks at a time. + /// + /// Platforms that have SIMD instructions may benefit from this + /// implementation over `eq_ignore_ascii_case_simple`. + #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] + #[inline] + const fn eq_ignore_ascii_case_chunks(&self, other: &[u8]) -> bool { + const N: usize = 16; + let (self_chunks, self_rem) = self.as_chunks::(); + let (other_chunks, _) = other.as_chunks::(); + + // Branchless check to encourage auto-vectorization + #[inline(always)] + const fn eq_ignore_ascii_inner(lhs: &[u8; N], rhs: &[u8; N]) -> bool { + let mut equal_ascii = true; + let mut j = 0; + while j < N { + equal_ascii &= lhs[j].eq_ignore_ascii_case(&rhs[j]); + j += 1; + } + + equal_ascii + } + + // Process the chunks, returning early if an inequality is found + let mut i = 0; + while i < self_chunks.len() && i < other_chunks.len() { + if !eq_ignore_ascii_inner(&self_chunks[i], &other_chunks[i]) { + return false; + } + i += 1; + } + + // If there are remaining tails, load the last N bytes in the slices to + // avoid falling back to per-byte checking. + if !self_rem.is_empty() { + if let (Some(a_rem), Some(b_rem)) = (self.last_chunk::(), other.last_chunk::()) { + if !eq_ignore_ascii_inner(a_rem, b_rem) { + return false; + } + } + } + + true + } + /// Converts this slice to its ASCII upper case equivalent in-place. /// /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', diff --git a/library/coretests/benches/str.rs b/library/coretests/benches/str.rs index 2f7d9d56a70b7..bf45a8f0a79bc 100644 --- a/library/coretests/benches/str.rs +++ b/library/coretests/benches/str.rs @@ -5,6 +5,7 @@ use test::{Bencher, black_box}; mod char_count; mod corpora; mod debug; +mod eq_ignore_ascii_case; mod iter; #[bench] diff --git a/library/coretests/benches/str/eq_ignore_ascii_case.rs b/library/coretests/benches/str/eq_ignore_ascii_case.rs new file mode 100644 index 0000000000000..29129b933bc46 --- /dev/null +++ b/library/coretests/benches/str/eq_ignore_ascii_case.rs @@ -0,0 +1,45 @@ +use test::{Bencher, black_box}; + +use super::corpora::*; + +#[bench] +fn bench_str_under_8_bytes_eq(b: &mut Bencher) { + let s = black_box("foo"); + let other = black_box("foo"); + b.iter(|| assert!(s.eq_ignore_ascii_case(other))) +} + +#[bench] +fn bench_str_of_8_bytes_eq(b: &mut Bencher) { + let s = black_box(en::TINY); + let other = black_box(en::TINY); + b.iter(|| assert!(s.eq_ignore_ascii_case(other))) +} + +#[bench] +fn bench_str_17_bytes_eq(b: &mut Bencher) { + let s = black_box(&en::SMALL[..17]); + let other = black_box(&en::SMALL[..17]); + b.iter(|| assert!(s.eq_ignore_ascii_case(other))) +} + +#[bench] +fn bench_str_31_bytes_eq(b: &mut Bencher) { + let s = black_box(&en::SMALL[..31]); + let other = black_box(&en::SMALL[..31]); + b.iter(|| assert!(s.eq_ignore_ascii_case(other))) +} + +#[bench] +fn bench_medium_str_eq(b: &mut Bencher) { + let s = black_box(en::MEDIUM); + let other = black_box(en::MEDIUM); + b.iter(|| assert!(s.eq_ignore_ascii_case(other))) +} + +#[bench] +fn bench_large_str_eq(b: &mut Bencher) { + let s = black_box(en::LARGE); + let other = black_box(en::LARGE); + b.iter(|| assert!(s.eq_ignore_ascii_case(other))) +} diff --git a/tests/codegen-llvm/lib-optimizations/eq_ignore_ascii_case.rs b/tests/codegen-llvm/lib-optimizations/eq_ignore_ascii_case.rs new file mode 100644 index 0000000000000..d4ac5d64585db --- /dev/null +++ b/tests/codegen-llvm/lib-optimizations/eq_ignore_ascii_case.rs @@ -0,0 +1,16 @@ +//@ compile-flags: -Copt-level=3 +//@ only-x86_64 +#![crate_type = "lib"] + +// Ensure that the optimized variant of the function gets auto-vectorized and +// that the inner helper function is inlined. +// CHECK-LABEL: @eq_ignore_ascii_case_autovectorized +#[no_mangle] +pub fn eq_ignore_ascii_case_autovectorized(s: &str, other: &str) -> bool { + // CHECK: load <16 x i8> + // CHECK: load <16 x i8> + // CHECK: bitcast <16 x i1> + // CHECK-NOT: call {{.*}}eq_ignore_ascii_inner + // CHECK-NOT: panic + s.eq_ignore_ascii_case(other) +}