Skip to content

Commit ab654c7

Browse files
committed
Unroll Keccak-f implementation
1 parent 3f01ddb commit ab654c7

File tree

1 file changed

+66
-33
lines changed

1 file changed

+66
-33
lines changed

src/crypto/sha3.cpp

Lines changed: 66 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -30,48 +30,81 @@ void KeccakF(uint64_t (&st)[25])
3030
0x8000000000008002, 0x8000000000000080, 0x000000000000800a, 0x800000008000000a,
3131
0x8000000080008081, 0x8000000000008080, 0x0000000080000001, 0x8000000080008008
3232
};
33-
static constexpr int ROTC[24] = {
34-
1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14,
35-
27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44
36-
};
37-
static constexpr int PILN[24] = {
38-
10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4,
39-
15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1
40-
};
4133
static constexpr int ROUNDS = 24;
4234

4335
for (int round = 0; round < ROUNDS; ++round) {
44-
uint64_t bc[5], t;
36+
uint64_t bc0, bc1, bc2, bc3, bc4, t;
4537

4638
// Theta
47-
for (int i = 0; i < 5; i++) {
48-
bc[i] = st[i] ^ st[i + 5] ^ st[i + 10] ^ st[i + 15] ^ st[i + 20];
49-
}
50-
51-
for (int i = 0; i < 5; i++) {
52-
t = bc[(i + 4) % 5] ^ Rotl(bc[(i + 1) % 5], 1);
53-
for (int j = 0; j < 25; j += 5) st[j + i] ^= t;
54-
}
39+
bc0 = st[0] ^ st[5] ^ st[10] ^ st[15] ^ st[20];
40+
bc1 = st[1] ^ st[6] ^ st[11] ^ st[16] ^ st[21];
41+
bc2 = st[2] ^ st[7] ^ st[12] ^ st[17] ^ st[22];
42+
bc3 = st[3] ^ st[8] ^ st[13] ^ st[18] ^ st[23];
43+
bc4 = st[4] ^ st[9] ^ st[14] ^ st[19] ^ st[24];
44+
t = bc4 ^ Rotl(bc1, 1); st[0] ^= t; st[5] ^= t; st[10] ^= t; st[15] ^= t; st[20] ^= t;
45+
t = bc0 ^ Rotl(bc2, 1); st[1] ^= t; st[6] ^= t; st[11] ^= t; st[16] ^= t; st[21] ^= t;
46+
t = bc1 ^ Rotl(bc3, 1); st[2] ^= t; st[7] ^= t; st[12] ^= t; st[17] ^= t; st[22] ^= t;
47+
t = bc2 ^ Rotl(bc4, 1); st[3] ^= t; st[8] ^= t; st[13] ^= t; st[18] ^= t; st[23] ^= t;
48+
t = bc3 ^ Rotl(bc0, 1); st[4] ^= t; st[9] ^= t; st[14] ^= t; st[19] ^= t; st[24] ^= t;
5549

5650
// Rho Pi
5751
t = st[1];
58-
for (int i = 0; i < 24; i++) {
59-
int j = PILN[i];
60-
bc[0] = st[j];
61-
st[j] = Rotl(t, ROTC[i]);
62-
t = bc[0];
63-
}
64-
65-
// Chi
66-
for (int j = 0; j < 25; j += 5) {
67-
for (int i = 0; i < 5; i++) bc[i] = st[j + i];
68-
for (int i = 0; i < 5; i++) {
69-
st[j + i] ^= (~bc[(i + 1) % 5]) & bc[(i + 2) % 5];
70-
}
71-
}
52+
bc0 = st[10]; st[10] = Rotl(t, 1); t = bc0;
53+
bc0 = st[7]; st[7] = Rotl(t, 3); t = bc0;
54+
bc0 = st[11]; st[11] = Rotl(t, 6); t = bc0;
55+
bc0 = st[17]; st[17] = Rotl(t, 10); t = bc0;
56+
bc0 = st[18]; st[18] = Rotl(t, 15); t = bc0;
57+
bc0 = st[3]; st[3] = Rotl(t, 21); t = bc0;
58+
bc0 = st[5]; st[5] = Rotl(t, 28); t = bc0;
59+
bc0 = st[16]; st[16] = Rotl(t, 36); t = bc0;
60+
bc0 = st[8]; st[8] = Rotl(t, 45); t = bc0;
61+
bc0 = st[21]; st[21] = Rotl(t, 55); t = bc0;
62+
bc0 = st[24]; st[24] = Rotl(t, 2); t = bc0;
63+
bc0 = st[4]; st[4] = Rotl(t, 14); t = bc0;
64+
bc0 = st[15]; st[15] = Rotl(t, 27); t = bc0;
65+
bc0 = st[23]; st[23] = Rotl(t, 41); t = bc0;
66+
bc0 = st[19]; st[19] = Rotl(t, 56); t = bc0;
67+
bc0 = st[13]; st[13] = Rotl(t, 8); t = bc0;
68+
bc0 = st[12]; st[12] = Rotl(t, 25); t = bc0;
69+
bc0 = st[2]; st[2] = Rotl(t, 43); t = bc0;
70+
bc0 = st[20]; st[20] = Rotl(t, 62); t = bc0;
71+
bc0 = st[14]; st[14] = Rotl(t, 18); t = bc0;
72+
bc0 = st[22]; st[22] = Rotl(t, 39); t = bc0;
73+
bc0 = st[9]; st[9] = Rotl(t, 61); t = bc0;
74+
bc0 = st[6]; st[6] = Rotl(t, 20); t = bc0;
75+
st[1] = Rotl(t, 44);
7276

73-
// Iota
74-
st[0] ^= RNDC[round];
77+
// Chi Iota
78+
bc0 = st[0]; bc1 = st[1]; bc2 = st[2]; bc3 = st[3]; bc4 = st[4];
79+
st[0] = bc0 ^ (~bc1 & bc2) ^ RNDC[round];
80+
st[1] = bc1 ^ (~bc2 & bc3);
81+
st[2] = bc2 ^ (~bc3 & bc4);
82+
st[3] = bc3 ^ (~bc4 & bc0);
83+
st[4] = bc4 ^ (~bc0 & bc1);
84+
bc0 = st[5]; bc1 = st[6]; bc2 = st[7]; bc3 = st[8]; bc4 = st[9];
85+
st[5] = bc0 ^ (~bc1 & bc2);
86+
st[6] = bc1 ^ (~bc2 & bc3);
87+
st[7] = bc2 ^ (~bc3 & bc4);
88+
st[8] = bc3 ^ (~bc4 & bc0);
89+
st[9] = bc4 ^ (~bc0 & bc1);
90+
bc0 = st[10]; bc1 = st[11]; bc2 = st[12]; bc3 = st[13]; bc4 = st[14];
91+
st[10] = bc0 ^ (~bc1 & bc2);
92+
st[11] = bc1 ^ (~bc2 & bc3);
93+
st[12] = bc2 ^ (~bc3 & bc4);
94+
st[13] = bc3 ^ (~bc4 & bc0);
95+
st[14] = bc4 ^ (~bc0 & bc1);
96+
bc0 = st[15]; bc1 = st[16]; bc2 = st[17]; bc3 = st[18]; bc4 = st[19];
97+
st[15] = bc0 ^ (~bc1 & bc2);
98+
st[16] = bc1 ^ (~bc2 & bc3);
99+
st[17] = bc2 ^ (~bc3 & bc4);
100+
st[18] = bc3 ^ (~bc4 & bc0);
101+
st[19] = bc4 ^ (~bc0 & bc1);
102+
bc0 = st[20]; bc1 = st[21]; bc2 = st[22]; bc3 = st[23]; bc4 = st[24];
103+
st[20] = bc0 ^ (~bc1 & bc2);
104+
st[21] = bc1 ^ (~bc2 & bc3);
105+
st[22] = bc2 ^ (~bc3 & bc4);
106+
st[23] = bc3 ^ (~bc4 & bc0);
107+
st[24] = bc4 ^ (~bc0 & bc1);
75108
}
76109
}
77110

0 commit comments

Comments
 (0)