@@ -36,10 +36,7 @@ pub(crate) fn scrypt_block_mix(input: &[u8], output: &mut [u8]) {
3636
3737 let last_block = & input[ input. len ( ) - 64 ..] ;
3838
39- let mut a = unsafe { vld1q_u32 ( last_block. as_ptr ( ) . cast ( ) ) } ;
40- let mut b = unsafe { vld1q_u32 ( last_block. as_ptr ( ) . add ( 16 ) . cast ( ) ) } ;
41- let mut c = unsafe { vld1q_u32 ( last_block. as_ptr ( ) . add ( 32 ) . cast ( ) ) } ;
42- let mut d = unsafe { vld1q_u32 ( last_block. as_ptr ( ) . add ( 48 ) . cast ( ) ) } ;
39+ let mut x = unsafe { vld1q_u32_x4 ( last_block. as_ptr ( ) . cast ( ) ) } ;
4340
4441 for ( i, chunk) in input. chunks ( 64 ) . enumerate ( ) {
4542 let pos = if i % 2 == 0 {
@@ -49,17 +46,17 @@ pub(crate) fn scrypt_block_mix(input: &[u8], output: &mut [u8]) {
4946 } ;
5047
5148 unsafe {
52- let chunk_a = vld1q_u32 ( chunk. as_ptr ( ) . cast ( ) ) ;
53- let chunk_b = vld1q_u32 ( chunk. as_ptr ( ) . add ( 16 ) . cast ( ) ) ;
54- let chunk_c = vld1q_u32 ( chunk. as_ptr ( ) . add ( 32 ) . cast ( ) ) ;
55- let chunk_d = vld1q_u32 ( chunk. as_ptr ( ) . add ( 48 ) . cast ( ) ) ;
49+ let chunk = vld1q_u32_x4 ( chunk. as_ptr ( ) . cast ( ) ) ;
5650
57- a = veorq_u32 ( a , chunk_a ) ;
58- b = veorq_u32 ( b , chunk_b ) ;
59- c = veorq_u32 ( c , chunk_c ) ;
60- d = veorq_u32 ( d , chunk_d ) ;
51+ x . 0 = veorq_u32 ( x . 0 , chunk . 0 ) ;
52+ x . 1 = veorq_u32 ( x . 1 , chunk . 1 ) ;
53+ x . 2 = veorq_u32 ( x . 2 , chunk . 2 ) ;
54+ x . 3 = veorq_u32 ( x . 3 , chunk . 3 ) ;
6155
62- let saves = [ a, b, c, d] ;
56+ let mut a = x. 0 ;
57+ let mut b = x. 1 ;
58+ let mut c = x. 2 ;
59+ let mut d = x. 3 ;
6360
6461 for _ in 0 ..8 {
6562 b = veorq_u32 ( b, vrol_u32 ! ( vaddq_u32( a, d) , 7 ) ) ;
@@ -74,15 +71,12 @@ pub(crate) fn scrypt_block_mix(input: &[u8], output: &mut [u8]) {
7471 ( b, d) = ( d, b) ;
7572 }
7673
77- a = vaddq_u32 ( a , saves [ 0 ] ) ;
78- b = vaddq_u32 ( b , saves [ 1 ] ) ;
79- c = vaddq_u32 ( c , saves [ 2 ] ) ;
80- d = vaddq_u32 ( d , saves [ 3 ] ) ;
74+ x . 0 = vaddq_u32 ( x . 0 , a ) ;
75+ x . 1 = vaddq_u32 ( x . 1 , b ) ;
76+ x . 2 = vaddq_u32 ( x . 2 , c ) ;
77+ x . 3 = vaddq_u32 ( x . 3 , d ) ;
8178
82- vst1q_u32 ( output. as_mut_ptr ( ) . add ( pos) . cast ( ) , a) ;
83- vst1q_u32 ( output. as_mut_ptr ( ) . add ( pos + 16 ) . cast ( ) , b) ;
84- vst1q_u32 ( output. as_mut_ptr ( ) . add ( pos + 32 ) . cast ( ) , c) ;
85- vst1q_u32 ( output. as_mut_ptr ( ) . add ( pos + 48 ) . cast ( ) , d) ;
79+ vst1q_u32_x4 ( output. as_mut_ptr ( ) . add ( pos) . cast ( ) , x) ;
8680 }
8781 }
8882}
0 commit comments