Skip to content

Commit 160de62

Browse files
zakcutnermarmeladema
authored andcommitted
Add documentation for AVX2 and memchr searchers
1 parent 8a44553 commit 160de62

File tree

2 files changed

+51
-0
lines changed

2 files changed

+51
-0
lines changed

src/avx2/mod.rs

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ pub use self::{original::*, rust::*};
88
use crate::{bits, memchr::MemchrSearcher, memcmp};
99
use std::{arch::x86_64::*, mem};
1010

11+
/// Rolling hash for the simple Rabin-Karp implementation. As a hashing function, the XOR of all the
12+
/// bytes is computed.
1113
#[derive(Clone, Copy, Default, PartialEq)]
1214
struct ScalarHash(usize);
1315

@@ -33,6 +35,8 @@ impl ScalarHash {
3335
}
3436
}
3537

38+
/// Represents an SIMD register type that is x86-specific (but could be used more generically) in
39+
/// order to share functionality between SSE2, AVX2 and possibly future implementations.
3640
trait Vector: Copy {
3741
unsafe fn set1_epi8(a: i8) -> Self;
3842

@@ -109,6 +113,9 @@ impl Vector for __m256i {
109113
}
110114
}
111115

116+
/// Hash of the first and "last" bytes in the needle for use with the SIMD algorithm implemented by
117+
/// `Avx2Searcher::vector_search_in`. As explained, any byte can be chosen to represent the "last"
118+
/// byte of the hash to prevent worst-case attacks.
112119
struct VectorHash<V: Vector> {
113120
first: V,
114121
last: V,
@@ -126,6 +133,30 @@ impl<V: Vector> VectorHash<V> {
126133

127134
macro_rules! avx2_searcher {
128135
($name:ident, $size:literal, $memcmp:path) => {
136+
/// Single-substring searcher using an AVX2 algorithm based on the "Generic SIMD" algorithm
137+
/// [presented by Wojciech Muła](http://0x80.pl/articles/simd-strfind.html).
138+
///
139+
/// It is similar to the Rabin-Karp algorithm, except that the hash is not rolling and is
140+
/// calculated for several lanes at once. It begins by picking the first byte in the needle
141+
/// and checking at which positions in the haystack it occurs. Any position where it does
142+
/// not can be immediately discounted as a potential match.
143+
///
144+
/// We then repeat this idea with a second byte in the needle (where the haystack is
145+
/// suitably offset) and take a bitwise AND to further limit the possible positions the
146+
/// needle can match in. Any remaining positions are fully evaluated using an equality
147+
/// comparison with the needle.
148+
///
149+
/// Originally, the algorithm always used the last byte for this second byte. Whilst this is
150+
/// often the most efficient option, it is vulnerable to a worst-case attack and so this
151+
/// implementation instead allows any byte (including a random one) to be chosen.
152+
///
153+
/// In the case where the needle is not a multiple of the number of SIMD lanes, the last
154+
/// chunk is made up of a partial overlap with the penultimate chunk to avoid reading random
155+
/// memory, differing from the original implementation. In this case, a mask is used to
156+
/// prevent performing an equality comparison on the same position twice.
157+
///
158+
/// When the haystack is too short for an AVX2 register, a similar SSE2 fallback is used
159+
/// instead. Finally, for very short haystacks there is a scalar Rabin-Karp implementation.
129160
pub struct $name {
130161
needle: Box<[u8]>,
131162
position: usize,
@@ -135,12 +166,15 @@ macro_rules! avx2_searcher {
135166
}
136167

137168
impl $name {
169+
/// Creates a new searcher for `needle`. By default, `position` is set to the last
170+
/// character in the needle.
138171
#[target_feature(enable = "avx2")]
139172
pub unsafe fn new(needle: Box<[u8]>) -> Self {
140173
let position = needle.len() - 1;
141174
Self::with_position(needle, position)
142175
}
143176

177+
/// Same as `new` but allows additionally specifying the `position` to use.
144178
#[target_feature(enable = "avx2")]
145179
pub unsafe fn with_position(needle: Box<[u8]>, position: usize) -> Self {
146180
assert!(!needle.is_empty());
@@ -270,6 +304,7 @@ macro_rules! avx2_searcher {
270304
self.vector_search_in(haystack, &self.avx2_hash, Self::sse2_search_in)
271305
}
272306

307+
/// Inlined version of `search_in` for hot call sites.
273308
#[inline]
274309
#[target_feature(enable = "avx2")]
275310
pub unsafe fn inlined_search_in(&self, haystack: &[u8]) -> bool {
@@ -280,6 +315,7 @@ macro_rules! avx2_searcher {
280315
self.avx2_search_in(haystack)
281316
}
282317

318+
/// Performs a substring search for the `needle` within `haystack`.
283319
#[inline]
284320
pub fn search_in(&self, haystack: &[u8]) -> bool {
285321
unsafe { self.inlined_search_in(haystack) }
@@ -302,6 +338,12 @@ avx2_searcher!(Avx2Searcher11, 11, memcmp::memcmp10);
302338
avx2_searcher!(Avx2Searcher12, 12, memcmp::memcmp11);
303339
avx2_searcher!(Avx2Searcher13, 13, memcmp::memcmp12);
304340

341+
/// Single-substring searcher based on `Avx2Searcher` but with dynamic algorithm selection.
342+
///
343+
/// It has specialized cases for zero-length needles, which are found in all haystacks, and
344+
/// one-length needles, which uses `MemchrSearcher`. For needles up to a length of thirteen it uses
345+
/// specialized versions of `Avx2Searcher`, finally falling back to the generic version of
346+
/// `Avx2Searcher` for longer needles.
305347
pub enum DynamicAvx2Searcher {
306348
N0,
307349
N1(MemchrSearcher),
@@ -321,12 +363,15 @@ pub enum DynamicAvx2Searcher {
321363
}
322364

323365
impl DynamicAvx2Searcher {
366+
/// Creates a new searcher for `needle`. By default, `position` is set to the last character in
367+
/// the needle.
324368
#[target_feature(enable = "avx2")]
325369
pub unsafe fn new(needle: Box<[u8]>) -> Self {
326370
let position = needle.len() - 1;
327371
Self::with_position(needle, position)
328372
}
329373

374+
/// Same as `new` but allows additionally specifying the `position` to use.
330375
#[target_feature(enable = "avx2")]
331376
pub unsafe fn with_position(needle: Box<[u8]>, position: usize) -> Self {
332377
assert!(!needle.is_empty());
@@ -351,6 +396,7 @@ impl DynamicAvx2Searcher {
351396
}
352397
}
353398

399+
/// Inlined version of `search_in` for hot call sites.
354400
#[inline]
355401
#[target_feature(enable = "avx2")]
356402
pub unsafe fn inlined_search_in(&self, haystack: &[u8]) -> bool {
@@ -373,6 +419,7 @@ impl DynamicAvx2Searcher {
373419
}
374420
}
375421

422+
/// Performs a substring search for the `needle` within `haystack`.
376423
#[inline]
377424
pub fn search_in(&self, haystack: &[u8]) -> bool {
378425
unsafe { self.inlined_search_in(haystack) }

src/memchr.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
11
use memchr::memchr;
22

3+
/// Single-byte searcher using `memchr` for faster matching.
34
pub struct MemchrSearcher(u8);
45

56
impl MemchrSearcher {
7+
/// Creates a new searcher for `needle`.
68
pub fn new(needle: u8) -> Self {
79
Self(needle)
810
}
911

12+
/// Inlined version of `search_in` for hot call sites.
1013
#[inline]
1114
pub fn inlined_search_in(&self, haystack: &[u8]) -> bool {
1215
if haystack.is_empty() {
@@ -16,6 +19,7 @@ impl MemchrSearcher {
1619
memchr(self.0, haystack).is_some()
1720
}
1821

22+
/// Performs a substring search for the `needle` within `haystack`.
1923
pub fn search_in(&self, haystack: &[u8]) -> bool {
2024
self.inlined_search_in(haystack)
2125
}

0 commit comments

Comments
 (0)