@@ -104,176 +104,6 @@ pub unsafe fn strstr_avx2_rust(haystack: &[u8], needle: &[u8]) -> bool {
104104 }
105105}
106106
107- /// Similar to `strstr_avx2_rust` implementation, but adapted into a struct.
108- pub struct StrStrAVX2Searcher {
109- needle : Box < [ u8 ] > ,
110- position : usize ,
111- sse_first : __m128i ,
112- sse_last : __m128i ,
113- avx2_first : __m256i ,
114- avx2_last : __m256i ,
115- needle_sum : usize ,
116- }
117-
118- impl StrStrAVX2Searcher {
119- /// Creates a new searcher for `needle`. By default, `position` is set to the last character in
120- /// the needle.
121- #[ target_feature( enable = "avx2" ) ]
122- pub unsafe fn new ( needle : & [ u8 ] ) -> Self {
123- Self :: with_position ( needle, needle. len ( ) - 1 )
124- }
125-
126- /// Same as `new` but allows additionally specifying the `position` to use.
127- #[ target_feature( enable = "avx2" ) ]
128- pub unsafe fn with_position ( needle : & [ u8 ] , position : usize ) -> Self {
129- let mut needle_sum = 0_usize ;
130- for & c in needle {
131- needle_sum += c as usize ;
132- }
133- StrStrAVX2Searcher {
134- needle : needle. to_vec ( ) . into_boxed_slice ( ) ,
135- position,
136- sse_first : _mm_set1_epi8 ( needle[ 0 ] as i8 ) ,
137- sse_last : _mm_set1_epi8 ( needle[ position] as i8 ) ,
138- avx2_first : _mm256_set1_epi8 ( needle[ 0 ] as i8 ) ,
139- avx2_last : _mm256_set1_epi8 ( needle[ position] as i8 ) ,
140- needle_sum,
141- }
142- }
143-
144- /// Performs a substring search for the `needle` within `haystack`.
145- #[ target_feature( enable = "avx2" ) ]
146- pub unsafe fn search_in ( & self , haystack : & [ u8 ] ) -> bool {
147- if haystack. len ( ) < self . needle . len ( ) {
148- return false ;
149- }
150- match self . needle . len ( ) {
151- 0 => true ,
152- 1 => memchr:: memchr ( self . needle [ 0 ] , haystack) . is_some ( ) ,
153- 2 => self . avx2_memcmp ( haystack, memcmp1) ,
154- 3 => self . avx2_memcmp ( haystack, memcmp2) ,
155- 4 => self . avx2_memcmp ( haystack, memcmp3) ,
156- 5 => self . avx2_memcmp ( haystack, memcmp4) ,
157- 6 => self . avx2_memcmp ( haystack, memcmp5) ,
158- 7 => self . avx2_memcmp ( haystack, memcmp6) ,
159- 8 => self . avx2_memcmp ( haystack, memcmp7) ,
160- 9 => self . avx2_memcmp ( haystack, memcmp8) ,
161- 10 => self . avx2_memcmp ( haystack, memcmp9) ,
162- 11 => self . avx2_memcmp ( haystack, memcmp10) ,
163- 12 => self . avx2_memcmp ( haystack, memcmp11) ,
164- 13 => self . avx2_memcmp ( haystack, memcmp12) ,
165- _ => self . avx2_memcmp ( haystack, memcmp) ,
166- }
167- }
168-
169- #[ inline]
170- #[ target_feature( enable = "avx2" ) ]
171- unsafe fn sse_memcmp ( & self , haystack : & [ u8 ] , memcmp : unsafe fn ( & [ u8 ] , & [ u8 ] ) -> bool ) -> bool {
172- if haystack. len ( ) < 16 {
173- return self . rabin_karp ( haystack) ;
174- }
175- let mut chunks = haystack[ ..=( haystack. len ( ) - self . needle . len ( ) ) ] . chunks_exact ( 16 ) ;
176- while let Some ( chunk) = chunks. next ( ) {
177- let i = chunk. as_ptr ( ) as usize - haystack. as_ptr ( ) as usize ;
178- let block_first = _mm_loadu_si128 ( chunk. as_ptr ( ) as * const __m128i ) ;
179- let block_last = _mm_loadu_si128 ( chunk. as_ptr ( ) . add ( self . position ) as * const __m128i ) ;
180-
181- let eq_first = _mm_cmpeq_epi8 ( self . sse_first , block_first) ;
182- let eq_last = _mm_cmpeq_epi8 ( self . sse_last , block_last) ;
183-
184- let mut mask = _mm_movemask_epi8 ( _mm_and_si128 ( eq_first, eq_last) ) as u32 ;
185- while mask != 0 {
186- let bitpos = mask. trailing_zeros ( ) as usize ;
187- let startpos = i + bitpos;
188- if startpos + self . needle . len ( ) <= haystack. len ( )
189- && memcmp (
190- & haystack[ ( startpos + 1 ) ..( startpos + self . needle . len ( ) ) ] ,
191- & self . needle [ 1 ..self . needle . len ( ) ] ,
192- )
193- {
194- return true ;
195- }
196- mask = clear_leftmost_set ( mask) ;
197- }
198- }
199-
200- let chunk = chunks. remainder ( ) ;
201- let i = chunk. as_ptr ( ) as usize - haystack. as_ptr ( ) as usize ;
202- let chunk = & haystack[ i..] ;
203- if !chunk. is_empty ( ) {
204- self . rabin_karp ( chunk)
205- } else {
206- false
207- }
208- }
209-
210- #[ inline]
211- #[ target_feature( enable = "avx2" ) ]
212- unsafe fn avx2_memcmp ( & self , haystack : & [ u8 ] , memcmp : unsafe fn ( & [ u8 ] , & [ u8 ] ) -> bool ) -> bool {
213- if haystack. len ( ) < 32 {
214- return self . sse_memcmp ( haystack, memcmp) ;
215- }
216- let mut chunks = haystack[ ..=( haystack. len ( ) - self . needle . len ( ) ) ] . chunks_exact ( 32 ) ;
217- while let Some ( chunk) = chunks. next ( ) {
218- let i = chunk. as_ptr ( ) as usize - haystack. as_ptr ( ) as usize ;
219- let block_first = _mm256_loadu_si256 ( chunk. as_ptr ( ) as * const __m256i ) ;
220- let block_last =
221- _mm256_loadu_si256 ( chunk. as_ptr ( ) . add ( self . position ) as * const __m256i ) ;
222-
223- let eq_first = _mm256_cmpeq_epi8 ( self . avx2_first , block_first) ;
224- let eq_last = _mm256_cmpeq_epi8 ( self . avx2_last , block_last) ;
225-
226- let mut mask = std:: mem:: transmute :: < i32 , u32 > ( _mm256_movemask_epi8 ( _mm256_and_si256 (
227- eq_first, eq_last,
228- ) ) ) ;
229- while mask != 0 {
230- let bitpos = mask. trailing_zeros ( ) as usize ;
231- let startpos = i + bitpos;
232- if startpos + self . needle . len ( ) <= haystack. len ( )
233- && memcmp (
234- & haystack[ ( startpos + 1 ) ..( startpos + self . needle . len ( ) ) ] ,
235- & self . needle [ 1 ..self . needle . len ( ) ] ,
236- )
237- {
238- return true ;
239- }
240- mask = clear_leftmost_set ( mask) ;
241- }
242- }
243-
244- let chunk = chunks. remainder ( ) ;
245- let i = chunk. as_ptr ( ) as usize - haystack. as_ptr ( ) as usize ;
246- let chunk = & haystack[ i..] ;
247- if !chunk. is_empty ( ) {
248- self . sse_memcmp ( chunk, memcmp)
249- } else {
250- false
251- }
252- }
253-
254- #[ inline]
255- unsafe fn rabin_karp ( & self , haystack : & [ u8 ] ) -> bool {
256- let mut haystack_sum = 0_usize ;
257- for & c in & haystack[ ..self . needle . len ( ) - 1 ] {
258- haystack_sum += c as usize ;
259- }
260-
261- let mut i = self . needle . len ( ) - 1 ;
262- while i < haystack. len ( ) {
263- haystack_sum += * haystack. get_unchecked ( i) as usize ;
264- i += 1 ;
265- if haystack_sum == self . needle_sum
266- && & haystack[ ( i - self . needle . len ( ) ) ..i] == self . needle . as_ref ( )
267- {
268- return true ;
269- }
270- haystack_sum -= * haystack. get_unchecked ( i - self . needle . len ( ) ) as usize ;
271- }
272-
273- false
274- }
275- }
276-
277107#[ cfg( test) ]
278108mod tests {
279109 use super :: strstr_avx2_rust;
0 commit comments