@@ -34,8 +34,8 @@ unsafe fn sha512_compress_x86_64_avx2(state: &mut [u64; 8], blocks: &[[u8; 128]]
34
34
start_block += 1 ;
35
35
}
36
36
37
- let mut ms: MsgSchedule = Default :: default ( ) ;
38
- let mut t2: RoundStates = [ 0u64 ; SHA512_ROUNDS_NUM ] ;
37
+ let mut ms: MsgSchedule = [ _mm_setzero_si128 ( ) ; 8 ] ;
38
+ let mut t2: RoundStates = [ _mm_setzero_si128 ( ) ; 40 ] ;
39
39
let mut x = [ _mm256_setzero_si256 ( ) ; 8 ] ;
40
40
41
41
for i in ( start_block..blocks. len ( ) ) . step_by ( 2 ) {
@@ -56,7 +56,7 @@ unsafe fn sha512_compress_x86_64_avx2(state: &mut [u64; 8], blocks: &[[u8; 128]]
56
56
57
57
#[ inline( always) ]
58
58
unsafe fn sha512_compress_x86_64_avx ( state : & mut [ u64 ; 8 ] , block : & [ u8 ; 128 ] ) {
59
- let mut ms = Default :: default ( ) ;
59
+ let mut ms = [ _mm_setzero_si128 ( ) ; 8 ] ;
60
60
let mut x = [ _mm_setzero_si128 ( ) ; 8 ] ;
61
61
62
62
// Reduced to single iteration
@@ -82,7 +82,7 @@ unsafe fn load_data_avx(x: &mut [__m128i; 8], ms: &mut MsgSchedule, data: *const
82
82
_mm_loadu_si128( & K64 [ 2 * $i] as * const u64 as * const _) ,
83
83
) ;
84
84
85
- _mm_store_si128 ( & mut ms[ 2 * $i] as * mut u64 as * mut _ , y ) ;
85
+ ms[ $i] = y ;
86
86
) * } ;
87
87
}
88
88
@@ -114,14 +114,8 @@ unsafe fn load_data_avx2(
114
114
let t = _mm_loadu_si128( K64 . as_ptr( ) . add( $i * 2 ) as * const u64 as * const _) ;
115
115
let y = _mm256_add_epi64( x[ $i] , _mm256_set_m128i( t, t) ) ;
116
116
117
- _mm_store_si128(
118
- & mut ms[ 2 * $i] as * mut u64 as * mut _,
119
- _mm256_extracti128_si256( y, 0 ) ,
120
- ) ;
121
- _mm_store_si128(
122
- & mut t2[ 2 * $i] as * mut u64 as * mut _,
123
- _mm256_extracti128_si256( y, 1 ) ,
124
- ) ;
117
+ ms[ $i] = _mm256_extracti128_si256( y, 0 ) ;
118
+ t2[ $i] = _mm256_extracti128_si256( y, 1 ) ;
125
119
) * } ;
126
120
}
127
121
@@ -137,10 +131,13 @@ unsafe fn rounds_0_63_avx(current_state: &mut State, x: &mut [__m128i; 8], ms: &
137
131
let k64 = _mm_loadu_si128 ( & K64 [ k64_idx] as * const u64 as * const _ ) ;
138
132
let y = sha512_update_x_avx ( x, k64) ;
139
133
140
- sha_round ( current_state, ms[ 2 * j] ) ;
141
- sha_round ( current_state, ms[ 2 * j + 1 ] ) ;
134
+ {
135
+ let ms = cast_ms ( ms) ;
136
+ sha_round ( current_state, ms[ 2 * j] ) ;
137
+ sha_round ( current_state, ms[ 2 * j + 1 ] ) ;
138
+ }
142
139
143
- _mm_store_si128 ( & mut ms[ 2 * j] as * const u64 as * mut _ , y ) ;
140
+ ms[ j] = y ;
144
141
k64_idx += 2 ;
145
142
}
146
143
}
@@ -160,17 +157,14 @@ unsafe fn rounds_0_63_avx2(
160
157
let t = _mm_loadu_si128 ( K64 . as_ptr ( ) . add ( k64x4_idx) as * const u64 as * const _ ) ;
161
158
let y = sha512_update_x_avx2 ( x, _mm256_set_m128i ( t, t) ) ;
162
159
163
- sha_round ( current_state, ms[ 2 * j] ) ;
164
- sha_round ( current_state, ms[ 2 * j + 1 ] ) ;
160
+ {
161
+ let ms = cast_ms ( ms) ;
162
+ sha_round ( current_state, ms[ 2 * j] ) ;
163
+ sha_round ( current_state, ms[ 2 * j + 1 ] ) ;
164
+ }
165
165
166
- _mm_store_si128 (
167
- & mut ms[ 2 * j] as * mut u64 as * mut _ ,
168
- _mm256_extracti128_si256 ( y, 0 ) ,
169
- ) ;
170
- _mm_store_si128 (
171
- & mut t2[ ( 16 * i) + 2 * j] as * mut u64 as * mut _ ,
172
- _mm256_extracti128_si256 ( y, 1 ) ,
173
- ) ;
166
+ ms[ j] = _mm256_extracti128_si256 ( y, 0 ) ;
167
+ t2[ 8 * i + j] = _mm256_extracti128_si256 ( y, 1 ) ;
174
168
175
169
k64x4_idx += 2 ;
176
170
}
@@ -179,14 +173,15 @@ unsafe fn rounds_0_63_avx2(
179
173
180
174
#[ inline( always) ]
181
175
fn rounds_64_79 ( current_state : & mut State , ms : & MsgSchedule ) {
176
+ let ms = cast_ms ( ms) ;
182
177
for i in 64 ..80 {
183
178
sha_round ( current_state, ms[ i & 0xf ] ) ;
184
179
}
185
180
}
186
181
187
182
#[ inline( always) ]
188
183
fn process_second_block ( current_state : & mut State , t2 : & RoundStates ) {
189
- for t2 in t2 . iter ( ) {
184
+ for t2 in cast_rs ( t2 ) . iter ( ) {
190
185
sha_round ( current_state, * t2) ;
191
186
}
192
187
}
@@ -341,9 +336,19 @@ fn_sha512_update_x!(sha512_update_x_avx2, __m256i, {
341
336
XOR = _mm256_xor_si256,
342
337
} ) ;
343
338
339
+ #[ inline( always) ]
340
+ fn cast_ms ( ms : & MsgSchedule ) -> & [ u64 ; SHA512_BLOCK_WORDS_NUM ] {
341
+ unsafe { & * ( ms as * const MsgSchedule as * const _ ) }
342
+ }
343
+
344
+ #[ inline( always) ]
345
+ fn cast_rs ( rs : & RoundStates ) -> & [ u64 ; SHA512_ROUNDS_NUM ] {
346
+ unsafe { & * ( rs as * const RoundStates as * const _ ) }
347
+ }
348
+
344
349
type State = [ u64 ; SHA512_HASH_WORDS_NUM ] ;
345
- type MsgSchedule = [ u64 ; SHA512_BLOCK_WORDS_NUM ] ;
346
- type RoundStates = [ u64 ; SHA512_ROUNDS_NUM ] ;
350
+ type MsgSchedule = [ __m128i ; SHA512_BLOCK_WORDS_NUM / 2 ] ;
351
+ type RoundStates = [ __m128i ; SHA512_ROUNDS_NUM / 2 ] ;
347
352
348
353
const SHA512_BLOCK_BYTE_LEN : usize = 128 ;
349
354
const SHA512_ROUNDS_NUM : usize = 80 ;
0 commit comments