Skip to content

Commit 7220730

Browse files
committed
groestl: add AVX-512/GFNI backend
1 parent 1376212 commit 7220730

File tree

8 files changed

+380
-96
lines changed

8 files changed

+380
-96
lines changed

Cargo.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

groestl/Cargo.toml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@ categories = ["cryptography", "no-std"]
1414

1515
[dependencies]
1616
digest = "0.11.0-rc.0"
17+
cfg-if = "1"
18+
19+
[target.'cfg(any(target_arch = "x86_64", target_arch = "x86"))'.dependencies]
20+
cpufeatures = "0.2.12"
1721

1822
[dev-dependencies]
1923
digest = { version = "0.11.0-rc.0", features = ["dev"] }
@@ -25,5 +29,9 @@ default = ["alloc"]
2529
alloc = ["digest/alloc"]
2630
zeroize = ["digest/zeroize"]
2731

32+
[lints.rust.unexpected_cfgs]
33+
level = "warn"
34+
check-cfg = ["cfg(groestl_force_soft)"]
35+
2836
[package.metadata.docs.rs]
2937
all-features = true

groestl/src/block_api.rs

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use core::fmt;
1+
use core::{fmt, slice};
22
use digest::{
33
HashMarker, InvalidOutputSize, Output,
44
block_api::{
@@ -36,9 +36,7 @@ macro_rules! impl_variant {
3636
#[inline]
3737
fn update_blocks(&mut self, blocks: &[Block<Self>]) {
3838
self.blocks_len += blocks.len() as u64;
39-
for block in blocks {
40-
$compress::compress(&mut self.state, block.as_ref());
41-
}
39+
$compress::compress(&mut self.state, Block::<Self>::cast_slice_to_core(blocks));
4240
}
4341
}
4442

@@ -72,7 +70,7 @@ macro_rules! impl_variant {
7270
self.blocks_len + 1
7371
};
7472
buffer.len64_padding_be(blocks_len, |block| {
75-
$compress::compress(&mut self.state, block.as_ref())
73+
$compress::compress(&mut self.state, slice::from_ref(block.as_ref()))
7674
});
7775
let res = $compress::p(&self.state);
7876
let n = $compress::COLS / 2;

groestl/src/compress_long.rs

Lines changed: 19 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -65,23 +65,25 @@ fn rndp(mut x: [u64; COLS], r: u64) -> [u64; COLS] {
6565
]
6666
}
6767

68-
pub(crate) fn compress(h: &mut [u64; COLS], block: &[u8; 128]) {
69-
let mut q = [0u64; COLS];
70-
for (chunk, v) in block.chunks_exact(8).zip(q.iter_mut()) {
71-
*v = u64::from_be_bytes(chunk.try_into().unwrap());
72-
}
73-
let mut p = [0u64; COLS];
74-
for i in 0..COLS {
75-
p[i] = h[i] ^ q[i];
76-
}
77-
for i in 0..ROUNDS {
78-
q = rndq(q, i);
79-
}
80-
for i in 0..ROUNDS {
81-
p = rndp(p, i << 56);
82-
}
83-
for i in 0..COLS {
84-
h[i] ^= q[i] ^ p[i];
68+
pub(crate) fn compress(h: &mut [u64; COLS], blocks: &[[u8; 128]]) {
69+
for block in blocks {
70+
let mut q = [0u64; COLS];
71+
for (chunk, v) in block.chunks_exact(8).zip(q.iter_mut()) {
72+
*v = u64::from_be_bytes(chunk.try_into().unwrap());
73+
}
74+
let mut p = [0u64; COLS];
75+
for i in 0..COLS {
76+
p[i] = h[i] ^ q[i];
77+
}
78+
for i in 0..ROUNDS {
79+
q = rndq(q, i);
80+
}
81+
for i in 0..ROUNDS {
82+
p = rndp(p, i << 56);
83+
}
84+
for i in 0..COLS {
85+
h[i] ^= q[i] ^ p[i];
86+
}
8587
}
8688
}
8789

groestl/src/compress_short.rs

Lines changed: 25 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -1,81 +1,33 @@
1-
#![allow(clippy::needless_range_loop)]
2-
use crate::table::TABLE;
3-
41
pub(crate) const COLS: usize = 8;
5-
const ROUNDS: u64 = 10;
62

7-
#[inline(always)]
8-
fn column(x: &[u64; COLS], c: [usize; 8]) -> u64 {
9-
let mut t = 0;
10-
for i in 0..8 {
11-
let sl = 8 * (7 - i);
12-
let idx = ((x[c[i]] >> sl) & 0xFF) as usize;
13-
t ^= TABLE[i][idx];
14-
}
15-
t
16-
}
3+
mod soft;
174

18-
#[inline(always)]
19-
fn rndq(mut x: [u64; COLS], r: u64) -> [u64; COLS] {
20-
for i in 0..COLS {
21-
x[i] ^= u64::MAX.wrapping_sub((i as u64) << 4) ^ r;
22-
}
23-
[
24-
column(&x, [1, 3, 5, 7, 0, 2, 4, 6]),
25-
column(&x, [2, 4, 6, 0, 1, 3, 5, 7]),
26-
column(&x, [3, 5, 7, 1, 2, 4, 6, 0]),
27-
column(&x, [4, 6, 0, 2, 3, 5, 7, 1]),
28-
column(&x, [5, 7, 1, 3, 4, 6, 0, 2]),
29-
column(&x, [6, 0, 2, 4, 5, 7, 1, 3]),
30-
column(&x, [7, 1, 3, 5, 6, 0, 2, 4]),
31-
column(&x, [0, 2, 4, 6, 7, 1, 3, 5]),
32-
]
33-
}
5+
cfg_if::cfg_if! {
6+
if #[cfg(any(not(any(target_arch = "x86_64", target_arch = "x86")), groestl_force_soft))] {
7+
pub(crate) use soft::*;
8+
} else {
9+
mod avx512_gfni;
3410

35-
#[inline(always)]
36-
fn rndp(mut x: [u64; COLS], r: u64) -> [u64; COLS] {
37-
for i in 0..COLS {
38-
x[i] ^= ((i as u64) << 60) ^ r;
39-
}
40-
[
41-
column(&x, [0, 1, 2, 3, 4, 5, 6, 7]),
42-
column(&x, [1, 2, 3, 4, 5, 6, 7, 0]),
43-
column(&x, [2, 3, 4, 5, 6, 7, 0, 1]),
44-
column(&x, [3, 4, 5, 6, 7, 0, 1, 2]),
45-
column(&x, [4, 5, 6, 7, 0, 1, 2, 3]),
46-
column(&x, [5, 6, 7, 0, 1, 2, 3, 4]),
47-
column(&x, [6, 7, 0, 1, 2, 3, 4, 5]),
48-
column(&x, [7, 0, 1, 2, 3, 4, 5, 6]),
49-
]
50-
}
11+
cpufeatures::new!(cpuid_avx512_gfni, "avx", "avx512f", "avx512vbmi", "gfni");
5112

52-
pub(crate) fn compress(h: &mut [u64; COLS], block: &[u8; 64]) {
53-
let mut q = [0u64; COLS];
54-
for (chunk, v) in block.chunks_exact(8).zip(q.iter_mut()) {
55-
*v = u64::from_be_bytes(chunk.try_into().unwrap());
56-
}
57-
let mut p = [0u64; COLS];
58-
for i in 0..COLS {
59-
p[i] = h[i] ^ q[i];
60-
}
61-
for i in 0..ROUNDS {
62-
q = rndq(q, i);
63-
}
64-
for i in 0..ROUNDS {
65-
p = rndp(p, i << 56);
66-
}
67-
for i in 0..COLS {
68-
h[i] ^= q[i] ^ p[i];
69-
}
70-
}
13+
#[inline(always)]
14+
pub(crate) fn compress(h: &mut [u64; COLS], blocks: &[[u8; 64]]) {
15+
if cpuid_avx512_gfni::get() {
16+
#[allow(unsafe_code)]
17+
unsafe { avx512_gfni::compress(h, blocks); }
18+
} else {
19+
soft::compress(h, blocks);
20+
}
21+
}
7122

72-
pub(crate) fn p(h: &[u64; COLS]) -> [u64; COLS] {
73-
let mut p = *h;
74-
for i in 0..ROUNDS {
75-
p = rndp(p, i << 56);
76-
}
77-
for i in 0..COLS {
78-
p[i] ^= h[i];
23+
#[inline(always)]
24+
pub(crate) fn p(h: &[u64; COLS]) -> [u64; COLS] {
25+
if cpuid_avx512_gfni::get() {
26+
#[allow(unsafe_code)]
27+
unsafe { avx512_gfni::p(h) }
28+
} else {
29+
soft::p(h)
30+
}
31+
}
7932
}
80-
p
8133
}

0 commit comments

Comments
 (0)