@@ -28,6 +28,19 @@ trait PackedCompareControl {
2828 fn needle_len ( & self ) -> i32 ;
2929}
3030
31+ #[ inline]
32+ #[ target_feature( enable = "sse4.2" ) ]
33+ unsafe fn find_small < C , const CONTROL_BYTE : i32 > ( packed : PackedCompare < C , CONTROL_BYTE > , haystack : & [ u8 ] ) -> Option < usize >
34+ where
35+ C : PackedCompareControl ,
36+ {
37+ let mut tail = [ 0u8 ; 16 ] ;
38+ core:: ptr:: copy_nonoverlapping ( haystack. as_ptr ( ) , tail. as_mut_ptr ( ) , haystack. len ( ) ) ;
39+ let haystack = & tail[ ..haystack. len ( ) ] ;
40+ debug_assert ! ( haystack. len( ) < :: std:: i32 :: MAX as usize ) ;
41+ packed. cmpestri ( haystack. as_ptr ( ) , haystack. len ( ) as i32 )
42+ }
43+
3144/// The PCMPxSTRx instructions always read 16 bytes worth of
3245/// data. Although the instructions handle unaligned memory access
3346/// just fine, they might attempt to read off the end of a page
4962 return None ;
5063 }
5164
65+ if haystack. len ( ) < 16 {
66+ return find_small ( packed, haystack) ;
67+ }
68+
5269 let mut offset = 0 ;
5370
5471 if let Some ( misaligned) = Misalignment :: new ( haystack) {
@@ -89,12 +106,7 @@ where
89106 return None ;
90107 }
91108
92- // By this point, the haystack's length must be less than 16
93- // bytes. It is thus reasonable to truncate it into an i32.
94- debug_assert ! ( haystack. len( ) < :: std:: i32 :: MAX as usize ) ;
95- packed
96- . cmpestri ( haystack. as_ptr ( ) , haystack. len ( ) as i32 )
97- . map ( |loc| offset + loc)
109+ find_small ( packed, haystack) . map ( |loc| loc + offset)
98110}
99111
100112struct PackedCompare < T , const CONTROL_BYTE : i32 > ( T ) ;
0 commit comments