Skip to content

Commit ee84e3f

Browse files
zakcutnermarmeladema
authored andcommitted
Overlap vectors for leftover elements
1 parent c5cc8fa commit ee84e3f

File tree

1 file changed

+49
-24
lines changed

1 file changed

+49
-24
lines changed

src/avx2/mod.rs

Lines changed: 49 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -182,46 +182,71 @@ macro_rules! avx2_searcher {
182182
false
183183
}
184184

185+
#[inline(always)]
186+
unsafe fn vector_search_in_chunk<V: Vector>(
187+
&self,
188+
haystack: &[u8],
189+
hash: &VectorHash<V>,
190+
start: *const u8,
191+
mask: i32,
192+
) -> bool {
193+
let first = Vector::loadu_si(start.cast());
194+
let last = Vector::loadu_si(start.add(self.position).cast());
195+
196+
let eq_first = Vector::cmpeq_epi8(hash.first, first);
197+
let eq_last = Vector::cmpeq_epi8(hash.last, last);
198+
199+
let eq = Vector::and_si(eq_first, eq_last);
200+
let mut eq = (Vector::movemask_epi8(eq) & mask) as u32;
201+
202+
let start = start as usize - haystack.as_ptr() as usize;
203+
while eq != 0 {
204+
let chunk = &haystack[start + eq.trailing_zeros() as usize..];
205+
if $memcmp(&chunk[1..self.size()], &self.needle[1..]) {
206+
return true;
207+
}
208+
209+
eq = bits::clear_leftmost_set(eq);
210+
}
211+
212+
false
213+
}
214+
185215
#[inline(always)]
186216
fn vector_search_in<V: Vector>(
187217
&self,
188218
haystack: &[u8],
189219
hash: &VectorHash<V>,
190220
next: fn(&Self, &[u8]) -> bool,
191221
) -> bool {
222+
debug_assert!(self.size() > 0);
223+
debug_assert!(haystack.len() >= self.size());
224+
192225
let lanes = mem::size_of::<V>();
193-
if haystack.len() < lanes {
226+
let end = haystack.len() - self.size() + 1;
227+
228+
if end < lanes {
194229
return next(self, haystack);
195230
}
196231

197-
let mut chunks = haystack[..=haystack.len() - self.size()].chunks_exact(lanes);
232+
let mut chunks = haystack[..end].chunks_exact(lanes);
198233
while let Some(chunk) = chunks.next() {
199-
let start = chunk.as_ptr();
200-
let first = unsafe { Vector::loadu_si(start.cast()) };
201-
let last = unsafe { Vector::loadu_si(start.add(self.position).cast()) };
202-
203-
let mask_first = unsafe { Vector::cmpeq_epi8(hash.first, first) };
204-
let mask_last = unsafe { Vector::cmpeq_epi8(hash.last, last) };
205-
206-
let mask = unsafe { Vector::and_si(mask_first, mask_last) };
207-
let mut mask = unsafe { Vector::movemask_epi8(mask) } as u32;
234+
if unsafe { self.vector_search_in_chunk(haystack, hash, chunk.as_ptr(), -1) } {
235+
return true;
236+
}
237+
}
208238

209-
let start = start as usize - haystack.as_ptr() as usize;
210-
while mask != 0 {
211-
let chunk = &haystack[start + mask.trailing_zeros() as usize..];
212-
if unsafe { $memcmp(&chunk[1..self.size()], &self.needle[1..]) } {
213-
return true;
214-
}
239+
let remainder = chunks.remainder().len();
240+
if remainder > 0 {
241+
let start = unsafe { haystack.as_ptr().add(end - lanes) };
242+
let mask = -1 << (lanes - remainder);
215243

216-
mask = bits::clear_leftmost_set(mask);
244+
if unsafe { self.vector_search_in_chunk(haystack, hash, start, mask) } {
245+
return true;
217246
}
218247
}
219248

220-
let remainder = chunks.remainder();
221-
debug_assert!(remainder.len() < lanes);
222-
223-
let chunk = &haystack[remainder.as_ptr() as usize - haystack.as_ptr() as usize..];
224-
next(self, chunk)
249+
false
225250
}
226251

227252
#[inline(always)]
@@ -405,7 +430,7 @@ mod tests {
405430

406431
assert!(search(
407432
b"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Maecenas commodo posuere orci a consectetur. Ut mattis turpis ut auctor consequat. Aliquam iaculis fringilla mi, nec aliquet purus",
408-
b"liquam iaculis fringilla mi, nec aliquet purus"
433+
b"Aliquam iaculis fringilla mi, nec aliquet purus"
409434
));
410435
}
411436

0 commit comments

Comments
 (0)