@@ -17,11 +17,11 @@ extern "C" {
1717 #[ link_name = "llvm.x86.sha256rnds2" ]
1818 fn sha256rnds2 ( a : i32x4 , b : i32x4 , k : i32x4 ) -> i32x4 ;
1919 #[ link_name = "llvm.x86.vsha512msg1" ]
20- fn vsha512msg1 ( a : i32x8 , b : i32x4 ) -> i32x8 ;
20+ fn vsha512msg1 ( a : i64x4 , b : i64x2 ) -> i64x4 ;
2121 #[ link_name = "llvm.x86.vsha512msg2" ]
22- fn vsha512msg2 ( a : i32x8 , b : i32x8 ) -> i32x8 ;
22+ fn vsha512msg2 ( a : i64x4 , b : i64x4 ) -> i64x4 ;
2323 #[ link_name = "llvm.x86.vsha512rnds2" ]
24- fn vsha512rnds2_epi64 ( a : i32x8 , b : i32x8 , c : i32x4 ) -> i32x4 ;
24+ fn vsha512rnds2 ( a : i64x4 , b : i64x4 , c : i64x2 ) -> i64x4 ;
2525}
2626
2727#[ cfg( test) ]
@@ -133,7 +133,7 @@ pub unsafe fn _mm_sha256rnds2_epu32(a: __m128i, b: __m128i, k: __m128i) -> __m12
133133#[ cfg_attr( test, assert_instr( vsha512msg1) ) ]
134134#[ unstable( feature = "sha512" , issue = "none" ) ]
135135pub unsafe fn _mm256_sha512msg1_epi64 ( a : __m256i , b : __m128i ) -> __m256i {
136- transmute ( vsha512msg1 ( a. as_i32x8 ( ) , b. as_i32x4 ( ) ) )
136+ transmute ( vsha512msg1 ( a. as_i64x4 ( ) , b. as_i64x2 ( ) ) )
137137}
138138
139139
@@ -145,7 +145,22 @@ pub unsafe fn _mm256_sha512msg1_epi64(a: __m256i, b: __m128i) -> __m256i {
145145#[ cfg_attr( test, assert_instr( vsha512msg2) ) ]
146146#[ unstable( feature = "sha512" , issue = "none" ) ]
147147pub unsafe fn _mm256_sha512msg2_epi64 ( a : __m256i , b : __m256i ) -> __m256i {
148- transmute ( vsha512msg2 ( a. as_i32x8 ( ) , b. as_i32x8 ( ) ) )
148+ transmute ( vsha512msg2 ( a. as_i64x4 ( ) , b. as_i64x4 ( ) ) )
149+ }
150+
151+ /// Performs two rounds of SHA512 operation using initial SHA512 state (C,D,G,H) from `a`,
152+ /// an initial SHA512 state (A,B,E,F) from `b`, and a pre-computed sum of the next two
153+ /// round message qwords and the corresponding round constants from `c` (only the two
154+ /// lower qwords of the third operand). The updated SHA512 state (A,B,E,F) is returned, and
155+ /// can be used as the updated state (C,D,G,H) in later rounds.
156+ ///
157+ /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sha512rnds2_epi64)
158+ #[ inline]
159+ #[ target_feature( enable = "sha512,avx" ) ]
160+ #[ cfg_attr( test, assert_instr( vsha512rnds2) ) ]
161+ #[ unstable( feature = "sha512" , issue = "none" ) ]
162+ pub unsafe fn _mm256_sha512rnds2_epi64 ( a : __m256i , b : __m256i , c : __m128i ) -> __m256i {
163+ transmute ( vsha512rnds2 ( a. as_i64x4 ( ) , b. as_i64x4 ( ) , c. as_i64x2 ( ) ) )
149164}
150165
151166#[ cfg( test) ]
@@ -255,4 +270,26 @@ mod tests {
255270 let r = _mm256_sha512msg1_epi64 ( a, b) ;
256271 assert_eq_m256i ( r, expected) ;
257272 }
273+
274+ #[ simd_test( enable = "sha512,avx" ) ]
275+ #[ allow( overflowing_literals) ]
276+ unsafe fn test_mm256_sha512msg2_epi64 ( ) {
277+ let a = _mm256_set_epi64x ( 0xe9b5dba5b5c0fbcf , 0x71374491428a2f98 , 0x0 , 0x0 ) ;
278+ let b = _mm256_set_epi64x ( 0xe9b5dba5b5c0fbcf , 0x71374491428a2f98 , 0x0 , 0x0 ) ;
279+ let expected = _mm256_set_epi64x ( 0xf714b202d863d47d , 0x90c30d946b3d3b35 , 0x0 , 0x0 ) ;
280+ let r = _mm256_sha512msg2_epi64 ( a, b) ;
281+ assert_eq_m256i ( r, expected) ;
282+ }
283+
284+
285+ #[ simd_test( enable = "sha512,avx" ) ]
286+ #[ allow( overflowing_literals) ]
287+ unsafe fn test_mm_sha512rnds2_epi64 ( ) {
288+ let a = _mm256_set_epi64x ( 0xe9b5dba5b5c0fbcf , 0x71374491428a2f98 , 0x0 , 0x0 ) ;
289+ let b = _mm256_set_epi64x ( 0xab1c5ed5923f82a4 , 0x59f111f13956c25b , 0x0 , 0x0 ) ;
290+ let k = _mm_set_epi64x ( 0 , 0x12835b01d807aa98 ) ;
291+ let expected = _mm256_set_epi64x ( 0xd3063037effb15ea , 0x187ee3db0d6d1d19 , 0x0 , 0x0 ) ;
292+ let r = _mm256_sha512rnds2_epi64 ( a, b, k) ;
293+ assert_eq_m256i ( r, expected) ;
294+ }
258295}
0 commit comments