Skip to content

Commit 7b88f48

Browse files
committed
Avoid scalar fallback in chunk remainder check
Refactor the eq check into an inner function for reuse in tail checking Rather than fall back to the simple implementation for tail handling, load the last 16 bytes to take advantage of vectorization. This doesn't seem to negatively impact check time even when the remainder count is low.
1 parent b031c51 commit 7b88f48

File tree

1 file changed

+25
-10
lines changed

1 file changed

+25
-10
lines changed

library/core/src/slice/ascii.rs

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -90,35 +90,50 @@ impl [u8] {
9090
true
9191
}
9292

93-
/// Optimized version of `eq_ignore_ascii_case` which processes chunks at a
94-
/// time.
93+
/// Optimized version of `eq_ignore_ascii_case` for byte lengths of at least
94+
/// 16 bytes, which processes chunks at a time.
9595
///
9696
/// Platforms that have SIMD instructions may benefit from this
9797
/// implementation over `eq_ignore_ascii_case_simple`.
9898
#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
9999
#[inline]
100100
const fn eq_ignore_ascii_case_chunks(&self, other: &[u8]) -> bool {
101101
const N: usize = 16;
102-
let (a, a_rem) = self.as_chunks::<N>();
103-
let (b, b_rem) = other.as_chunks::<N>();
102+
let (self_chunks, self_rem) = self.as_chunks::<N>();
103+
let (other_chunks, _) = other.as_chunks::<N>();
104104

105-
let mut i = 0;
106-
while i < a.len() && i < b.len() {
105+
// Branchless check to encourage auto-vectorization
106+
const fn eq_ignore_ascii_inner(lhs: &[u8; N], rhs: &[u8; N]) -> bool {
107107
let mut equal_ascii = true;
108108
let mut j = 0;
109109
while j < N {
110-
equal_ascii &= a[i][j].eq_ignore_ascii_case(&b[i][j]);
110+
equal_ascii &= lhs[j].eq_ignore_ascii_case(&rhs[j]);
111111
j += 1;
112112
}
113113

114-
if !equal_ascii {
114+
equal_ascii
115+
}
116+
117+
// Process the chunks, returning early if an inequality is found
118+
let mut i = 0;
119+
while i < self_chunks.len() && i < other_chunks.len() {
120+
if !eq_ignore_ascii_inner(&self_chunks[i], &other_chunks[i]) {
115121
return false;
116122
}
117-
118123
i += 1;
119124
}
120125

121-
a_rem.eq_ignore_ascii_case_simple(b_rem)
126+
// If there are remaining tails, load the last N bytes in the slices to
127+
// avoid falling back to per-byte checking.
128+
if !self_rem.is_empty() {
129+
if let (Some(a_rem), Some(b_rem)) = (self.last_chunk::<N>(), other.last_chunk::<N>()) {
130+
if !eq_ignore_ascii_inner(a_rem, b_rem) {
131+
return false;
132+
}
133+
}
134+
}
135+
136+
true
122137
}
123138

124139
/// Converts this slice to its ASCII upper case equivalent in-place.

0 commit comments

Comments
 (0)