@@ -69,6 +69,45 @@ trait Vector: Copy {
6969 unsafe fn movemask_epi8 ( a : Self ) -> i32 ;
7070}
7171
72+ #[ derive( Clone , Copy ) ]
73+ #[ repr( transparent) ]
74+ #[ allow( non_camel_case_types) ]
75+ struct __m32i ( __m128i ) ;
76+
77+ impl Vector for __m32i {
78+ const LANES : usize = 4 ;
79+
80+ #[ inline]
81+ #[ target_feature( enable = "avx2" ) ]
82+ unsafe fn set1_epi8 ( a : i8 ) -> Self {
83+ __m32i ( _mm_set1_epi8 ( a) )
84+ }
85+
86+ #[ inline]
87+ #[ target_feature( enable = "avx2" ) ]
88+ unsafe fn loadu_si ( a : * const Self ) -> Self {
89+ __m32i ( _mm_set1_epi32 ( std:: ptr:: read_unaligned ( a as * const i32 ) ) )
90+ }
91+
92+ #[ inline]
93+ #[ target_feature( enable = "avx2" ) ]
94+ unsafe fn cmpeq_epi8 ( a : Self , b : Self ) -> Self {
95+ __m32i ( _mm_cmpeq_epi8 ( a. 0 , b. 0 ) )
96+ }
97+
98+ #[ inline]
99+ #[ target_feature( enable = "avx2" ) ]
100+ unsafe fn and_si ( a : Self , b : Self ) -> Self {
101+ __m32i ( _mm_and_si128 ( a. 0 , b. 0 ) )
102+ }
103+
104+ #[ inline]
105+ #[ target_feature( enable = "avx2" ) ]
106+ unsafe fn movemask_epi8 ( a : Self ) -> i32 {
107+ _mm_movemask_epi8 ( a. 0 ) & 0xF
108+ }
109+ }
110+
72111#[ derive( Clone , Copy ) ]
73112#[ repr( transparent) ]
74113#[ allow( non_camel_case_types) ]
@@ -205,6 +244,16 @@ impl From<&VectorHash<__m128i>> for VectorHash<__m64i> {
205244 }
206245}
207246
247+ impl From < & VectorHash < __m128i > > for VectorHash < __m32i > {
248+ #[ inline]
249+ fn from ( hash : & VectorHash < __m128i > ) -> Self {
250+ Self {
251+ first : __m32i ( hash. first ) ,
252+ last : __m32i ( hash. last ) ,
253+ }
254+ }
255+ }
256+
208257/// Single-substring searcher using an AVX2 algorithm based on the "Generic
209258/// SIMD" algorithm [presented by Wojciech
210259/// Muła](http://0x80.pl/articles/simd-strfind.html).
@@ -403,21 +452,28 @@ impl<N: Needle> Avx2Searcher<N> {
403452
404453 #[ inline]
405454 #[ target_feature( enable = "avx2" ) ]
406- unsafe fn u64_search_in ( & self , haystack : & [ u8 ] ) -> bool {
407- let hash = VectorHash :: < __m64i > :: from ( & self . sse2_hash ) ;
455+ unsafe fn sse2_4_search_in ( & self , haystack : & [ u8 ] ) -> bool {
456+ let hash = VectorHash :: < __m32i > :: from ( & self . sse2_hash ) ;
408457 self . vector_search_in ( haystack, & hash, Self :: scalar_search_in)
409458 }
410459
411460 #[ inline]
412461 #[ target_feature( enable = "avx2" ) ]
413- unsafe fn sse2_search_in ( & self , haystack : & [ u8 ] ) -> bool {
414- self . vector_search_in ( haystack, & self . sse2_hash , Self :: u64_search_in)
462+ unsafe fn sse2_8_search_in ( & self , haystack : & [ u8 ] ) -> bool {
463+ let hash = VectorHash :: < __m64i > :: from ( & self . sse2_hash ) ;
464+ self . vector_search_in ( haystack, & hash, Self :: sse2_4_search_in)
465+ }
466+
467+ #[ inline]
468+ #[ target_feature( enable = "avx2" ) ]
469+ unsafe fn sse2_16_search_in ( & self , haystack : & [ u8 ] ) -> bool {
470+ self . vector_search_in ( haystack, & self . sse2_hash , Self :: sse2_8_search_in)
415471 }
416472
417473 #[ inline]
418474 #[ target_feature( enable = "avx2" ) ]
419475 unsafe fn avx2_search_in ( & self , haystack : & [ u8 ] ) -> bool {
420- self . vector_search_in ( haystack, & self . avx2_hash , Self :: sse2_search_in )
476+ self . vector_search_in ( haystack, & self . avx2_hash , Self :: sse2_16_search_in )
421477 }
422478
423479 /// Inlined version of `search_in` for hot call sites.
0 commit comments