Skip to content

Commit 4c1d9da

Browse files
authored
sha2: fix AVX backend (RustCrypto#345)
1 parent c478cbb commit 4c1d9da

File tree

5 files changed

+49
-33
lines changed

5 files changed

+49
-33
lines changed

Cargo.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

sha2/CHANGELOG.md

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,25 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

8-
## 0.10.0 (2021-12-07)
8+
## 0.10.1 (2022-01-06)
9+
### Fixed
10+
- Bug in the AVX2 backend ([#345])
11+
12+
## 0.10.0 (2021-12-07) [YANKED]
913
### Changed
1014
- Update to `digest` v0.10 ([#217])
1115
- Rename `Sha512Trunc224` and `Sha512Trunc256` to `Sha512_224` and `Sha512_256` respectively. ([#217])
1216

1317
[#217]: https://github.com/RustCrypto/hashes/pull/217
1418

15-
## 0.9.8 (2021-09-09)
19+
## 0.9.9 (2022-01-06)
20+
### Fixed
21+
- Backport [#345] bug fix for the AVX2 backend ([#346])
22+
23+
[#345]: https://github.com/RustCrypto/hashes/pull/345
24+
[#346]: https://github.com/RustCrypto/hashes/pull/346
25+
26+
## 0.9.8 (2021-09-09) [YANKED]
1627
### Fixed
1728
- Bug in the AVX2 backend ([#314])
1829

sha2/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "sha2"
3-
version = "0.10.0" # Also update html_root_url in lib.rs when bumping this
3+
version = "0.10.1" # Also update html_root_url in lib.rs when bumping this
44
description = """
55
Pure Rust implementation of the SHA-2 hash function family
66
including SHA-224, SHA-256, SHA-384, and SHA-512.

sha2/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@
4848
#![doc(
4949
html_logo_url = "https://raw.githubusercontent.com/RustCrypto/media/6ee8e381/logo.svg",
5050
html_favicon_url = "https://raw.githubusercontent.com/RustCrypto/media/6ee8e381/logo.svg",
51-
html_root_url = "https://docs.rs/sha2/0.10.0"
51+
html_root_url = "https://docs.rs/sha2/0.10.1"
5252
)]
5353
#![warn(missing_docs, rust_2018_idioms)]
5454

sha2/src/sha512/x86.rs

Lines changed: 33 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,8 @@ unsafe fn sha512_compress_x86_64_avx2(state: &mut [u64; 8], blocks: &[[u8; 128]]
3434
start_block += 1;
3535
}
3636

37-
let mut ms: MsgSchedule = Default::default();
38-
let mut t2: RoundStates = [0u64; SHA512_ROUNDS_NUM];
37+
let mut ms: MsgSchedule = [_mm_setzero_si128(); 8];
38+
let mut t2: RoundStates = [_mm_setzero_si128(); 40];
3939
let mut x = [_mm256_setzero_si256(); 8];
4040

4141
for i in (start_block..blocks.len()).step_by(2) {
@@ -56,7 +56,7 @@ unsafe fn sha512_compress_x86_64_avx2(state: &mut [u64; 8], blocks: &[[u8; 128]]
5656

5757
#[inline(always)]
5858
unsafe fn sha512_compress_x86_64_avx(state: &mut [u64; 8], block: &[u8; 128]) {
59-
let mut ms = Default::default();
59+
let mut ms = [_mm_setzero_si128(); 8];
6060
let mut x = [_mm_setzero_si128(); 8];
6161

6262
// Reduced to single iteration
@@ -82,7 +82,7 @@ unsafe fn load_data_avx(x: &mut [__m128i; 8], ms: &mut MsgSchedule, data: *const
8282
_mm_loadu_si128(&K64[2 * $i] as *const u64 as *const _),
8383
);
8484

85-
_mm_store_si128(&mut ms[2 * $i] as *mut u64 as *mut _, y);
85+
ms[$i] = y;
8686
)*};
8787
}
8888

@@ -114,14 +114,8 @@ unsafe fn load_data_avx2(
114114
let t = _mm_loadu_si128(K64.as_ptr().add($i * 2) as *const u64 as *const _);
115115
let y = _mm256_add_epi64(x[$i], _mm256_set_m128i(t, t));
116116

117-
_mm_store_si128(
118-
&mut ms[2 * $i] as *mut u64 as *mut _,
119-
_mm256_extracti128_si256(y, 0),
120-
);
121-
_mm_store_si128(
122-
&mut t2[2 * $i] as *mut u64 as *mut _,
123-
_mm256_extracti128_si256(y, 1),
124-
);
117+
ms[$i] = _mm256_extracti128_si256(y, 0);
118+
t2[$i] = _mm256_extracti128_si256(y, 1);
125119
)*};
126120
}
127121

@@ -137,10 +131,13 @@ unsafe fn rounds_0_63_avx(current_state: &mut State, x: &mut [__m128i; 8], ms: &
137131
let k64 = _mm_loadu_si128(&K64[k64_idx] as *const u64 as *const _);
138132
let y = sha512_update_x_avx(x, k64);
139133

140-
sha_round(current_state, ms[2 * j]);
141-
sha_round(current_state, ms[2 * j + 1]);
134+
{
135+
let ms = cast_ms(ms);
136+
sha_round(current_state, ms[2 * j]);
137+
sha_round(current_state, ms[2 * j + 1]);
138+
}
142139

143-
_mm_store_si128(&mut ms[2 * j] as *const u64 as *mut _, y);
140+
ms[j] = y;
144141
k64_idx += 2;
145142
}
146143
}
@@ -160,17 +157,14 @@ unsafe fn rounds_0_63_avx2(
160157
let t = _mm_loadu_si128(K64.as_ptr().add(k64x4_idx) as *const u64 as *const _);
161158
let y = sha512_update_x_avx2(x, _mm256_set_m128i(t, t));
162159

163-
sha_round(current_state, ms[2 * j]);
164-
sha_round(current_state, ms[2 * j + 1]);
160+
{
161+
let ms = cast_ms(ms);
162+
sha_round(current_state, ms[2 * j]);
163+
sha_round(current_state, ms[2 * j + 1]);
164+
}
165165

166-
_mm_store_si128(
167-
&mut ms[2 * j] as *mut u64 as *mut _,
168-
_mm256_extracti128_si256(y, 0),
169-
);
170-
_mm_store_si128(
171-
&mut t2[(16 * i) + 2 * j] as *mut u64 as *mut _,
172-
_mm256_extracti128_si256(y, 1),
173-
);
166+
ms[j] = _mm256_extracti128_si256(y, 0);
167+
t2[8 * i + j] = _mm256_extracti128_si256(y, 1);
174168

175169
k64x4_idx += 2;
176170
}
@@ -179,14 +173,15 @@ unsafe fn rounds_0_63_avx2(
179173

180174
#[inline(always)]
181175
fn rounds_64_79(current_state: &mut State, ms: &MsgSchedule) {
176+
let ms = cast_ms(ms);
182177
for i in 64..80 {
183178
sha_round(current_state, ms[i & 0xf]);
184179
}
185180
}
186181

187182
#[inline(always)]
188183
fn process_second_block(current_state: &mut State, t2: &RoundStates) {
189-
for t2 in t2.iter() {
184+
for t2 in cast_rs(t2).iter() {
190185
sha_round(current_state, *t2);
191186
}
192187
}
@@ -341,9 +336,19 @@ fn_sha512_update_x!(sha512_update_x_avx2, __m256i, {
341336
XOR = _mm256_xor_si256,
342337
});
343338

339+
#[inline(always)]
340+
fn cast_ms(ms: &MsgSchedule) -> &[u64; SHA512_BLOCK_WORDS_NUM] {
341+
unsafe { &*(ms as *const MsgSchedule as *const _) }
342+
}
343+
344+
#[inline(always)]
345+
fn cast_rs(rs: &RoundStates) -> &[u64; SHA512_ROUNDS_NUM] {
346+
unsafe { &*(rs as *const RoundStates as *const _) }
347+
}
348+
344349
type State = [u64; SHA512_HASH_WORDS_NUM];
345-
type MsgSchedule = [u64; SHA512_BLOCK_WORDS_NUM];
346-
type RoundStates = [u64; SHA512_ROUNDS_NUM];
350+
type MsgSchedule = [__m128i; SHA512_BLOCK_WORDS_NUM / 2];
351+
type RoundStates = [__m128i; SHA512_ROUNDS_NUM / 2];
347352

348353
const SHA512_BLOCK_BYTE_LEN: usize = 128;
349354
const SHA512_ROUNDS_NUM: usize = 80;

0 commit comments

Comments
 (0)