1- use super :: {
2- mask, Array , ArraySize , False , Gr , Hs1HashKey , Hs1Params , PhantomData , Quot , True , B16 , U4 ,
3- } ;
1+ use super :: { mask, Array , False , Gr , Hs1HashKey , Hs1Params , PhantomData , Quot , True , B16 , U4 } ;
42use aead:: array:: typenum:: Unsigned ;
53use core:: mem;
64
5+ #[ cfg( target_feature = "sse2" ) ]
6+ mod sse2;
7+
78#[ derive( Clone ) ]
89pub struct Hasher < P : Hs1Params > {
910 k : Hs1HashKey < P > ,
@@ -53,41 +54,52 @@ impl<P: Hs1Params> Hasher<P> {
5354 pub fn new ( k : & Hs1HashKey < P > ) -> Self {
5455 Self {
5556 k : k. clone ( ) ,
56- h : array_from_iter ( core :: iter :: repeat ( 1 ) ) ,
57- block : Array :: default ( ) ,
57+ h : Array :: from_fn ( |_| 1 ) ,
58+ block : Default :: default ( ) ,
5859 bytes : 0 ,
5960 _marker : PhantomData ,
6061 }
6162 }
6263
64+ #[ inline( always) ]
6365 fn update_block ( & mut self ) -> & mut Self {
6466 assert ! ( usize :: from( self . bytes) <= self . block_u8( ) . len( ) ) ;
6567
68+ #[ cfg( target_feature = "sse2" ) ]
69+ if true {
70+ // SAFETY: sse2 is supported
71+ unsafe {
72+ return self . update_block_sse2 ( ) ;
73+ }
74+ }
75+
6676 #[ inline( always) ]
67- fn nh_step ( & [ ax, bx, cx, dx] : & [ u32 ; 4 ] , & [ ay, by, cy, dy] : & [ u32 ; 4 ] ) -> u64 {
68- let d = u64:: from ( dx. wrapping_add ( dy) ) ;
69- let c = u64:: from ( cx. wrapping_add ( cy) ) ;
70- let b = u64:: from ( bx. wrapping_add ( by) ) ;
77+ fn nh_step ( & [ ax, bx, cx, dx] : & [ u32 ; 4 ] , & [ ay, by, cy, dy] : & [ u32 ; 4 ] ) -> [ u64 ; 2 ] {
7178 let a = u64:: from ( ax. wrapping_add ( ay) ) ;
72- ( a * c) . wrapping_add ( b * d)
79+ let b = u64:: from ( bx. wrapping_add ( by) ) ;
80+ let c = u64:: from ( cx. wrapping_add ( cy) ) ;
81+ let d = u64:: from ( dx. wrapping_add ( dy) ) ;
82+ [ a * c, b * d]
7383 }
7484
7585 let m_ints = & self . block ;
7686
7787 let block16_count = usize:: from ( ( ( self . bytes + 15 ) / 16 ) . max ( 1 ) ) ;
7888
79- let mut nh = Array :: < u64 , P :: T > :: default ( ) ;
89+ let mut nh = Array :: < [ u64 ; 2 ] , P :: T > :: default ( ) ;
8090 for ( i0, m_ints_i) in m_ints. chunks_exact ( 4 ) . enumerate ( ) . take ( block16_count) {
81- for ( nh_i , k_n_i_i) in nh. iter_mut ( ) . zip ( self . k . nh . chunks_exact ( 4 ) . skip ( i0) ) {
91+ for ( [ nh_i0 , nh_i1 ] , k_n_i_i) in nh. iter_mut ( ) . zip ( self . k . nh . chunks_exact ( 4 ) . skip ( i0) ) {
8292 let k_n_i_i = k_n_i_i. try_into ( ) . expect ( "exactly 4 elements" ) ;
8393 let m_ints_i = m_ints_i. try_into ( ) . expect ( "exactly 4 elements" ) ;
84- let s = nh_step ( k_n_i_i, m_ints_i) ;
85- * nh_i = nh_i. wrapping_add ( s) ;
94+ let [ s0, s1] = nh_step ( k_n_i_i, m_ints_i) ;
95+ * nh_i0 = nh_i0. wrapping_add ( s0) ;
96+ * nh_i1 = nh_i1. wrapping_add ( s1) ;
8697 }
8798 }
8899
89100 nh. iter ( )
90- . map ( |nh_i| ( nh_i + ( u64:: from ( self . bytes ) & mask ( 4 ) ) ) & mask ( 60 ) )
101+ . map ( |& [ ac, bd] | ac. wrapping_add ( bd) )
102+ . map ( |nh_i| ( nh_i. wrapping_add ( u64:: from ( self . bytes ) & mask ( 4 ) ) ) & mask ( 60 ) )
91103 . zip ( self . k . poly . iter ( ) )
92104 . zip ( self . h . iter_mut ( ) )
93105 . for_each ( |( ( a_i, & k_p_i) , h_i) | * h_i = poly_step ( * h_i, a_i, k_p_i) ) ;
@@ -97,6 +109,7 @@ impl<P: Hs1Params> Hasher<P> {
97109 self
98110 }
99111
112+ #[ inline( always) ]
100113 pub fn update < ' a > ( & ' a mut self , bytes : & [ u8 ] ) -> & ' a mut Self {
101114 assert ! ( usize :: from( self . bytes) < self . block_u8( ) . len( ) ) ;
102115 let start = usize:: from ( self . bytes ) ;
@@ -123,6 +136,7 @@ impl<P: Hs1Params> Hasher<P> {
123136 self
124137 }
125138
139+ #[ inline( always) ]
126140 pub ( crate ) fn pad_to ( & mut self , bits : u8 ) -> & mut Self {
127141 debug_assert ! ( 1 << bits <= B16 :: <P >:: to_u8( ) ) ;
128142 let m = mask ( bits) as u8 ;
@@ -131,6 +145,7 @@ impl<P: Hs1Params> Hasher<P> {
131145 }
132146
133147 // TODO &mut self helps avoid needing to clone(), but might be unintuitive
148+ #[ inline( always) ]
134149 pub fn finalize ( & mut self ) -> Array < Output < P > , P :: T > {
135150 // TODO we need to handle empty data properly
136151 // However, see the note in crate::test::test_vectors::hash_me_empty
@@ -146,6 +161,7 @@ impl<P: Hs1Params> Hasher<P> {
146161 out
147162 }
148163
164+ #[ inline( always) ]
149165 fn block_u8 ( & mut self ) -> & mut Array < u8 , B16 < P > > {
150166 const {
151167 assert ! (
@@ -177,18 +193,6 @@ const fn poly_finalize(a: u64) -> u64 {
177193 a & c
178194}
179195
180- #[ inline( always) ]
181- fn array_from_iter < I , L > ( it : I ) -> Array < I :: Item , L >
182- where
183- I : IntoIterator ,
184- L : ArraySize ,
185- I :: Item : Default ,
186- {
187- let mut v = Array :: < I :: Item , L > :: default ( ) ;
188- v. iter_mut ( ) . zip ( it) . for_each ( |( w, r) | * w = r) ;
189- v
190- }
191-
192196#[ cfg( test) ]
193197mod test {
194198 #[ test]
0 commit comments