@@ -18,6 +18,8 @@ constexpr static inline uint32_t rotl32(uint32_t v, int c) { return (v << c) | (
18
18
a += b; d = rotl32(d ^ a, 8 ); \
19
19
c += d; b = rotl32(b ^ c, 7 );
20
20
21
+ #define REPEAT10 (a ) do { {a}; {a}; {a}; {a}; {a}; {a}; {a}; {a}; {a}; {a}; } while (0 )
22
+
21
23
static const unsigned char sigma[] = " expand 32-byte k" ;
22
24
static const unsigned char tau[] = " expand 16-byte k" ;
23
25
@@ -119,16 +121,19 @@ void ChaCha20::Keystream(unsigned char* c, size_t bytes)
119
121
x13 = j13;
120
122
x14 = j14;
121
123
x15 = j15;
122
- for (i = 20 ;i > 0 ;i -= 2 ) {
123
- QUARTERROUND ( x0, x4, x8,x12)
124
- QUARTERROUND ( x1, x5, x9,x13)
125
- QUARTERROUND ( x2, x6,x10,x14)
126
- QUARTERROUND ( x3, x7,x11,x15)
127
- QUARTERROUND ( x0, x5,x10,x15)
128
- QUARTERROUND ( x1, x6,x11,x12)
129
- QUARTERROUND ( x2, x7, x8,x13)
130
- QUARTERROUND ( x3, x4, x9,x14)
131
- }
124
+
125
+ // The 20 inner ChaCha20 rounds are unrolled here for performance.
126
+ REPEAT10 (
127
+ QUARTERROUND ( x0, x4, x8,x12);
128
+ QUARTERROUND ( x1, x5, x9,x13);
129
+ QUARTERROUND ( x2, x6,x10,x14);
130
+ QUARTERROUND ( x3, x7,x11,x15);
131
+ QUARTERROUND ( x0, x5,x10,x15);
132
+ QUARTERROUND ( x1, x6,x11,x12);
133
+ QUARTERROUND ( x2, x7, x8,x13);
134
+ QUARTERROUND ( x3, x4, x9,x14);
135
+ );
136
+
132
137
x0 += j0;
133
138
x1 += j1;
134
139
x2 += j2;
@@ -231,16 +236,19 @@ void ChaCha20::Crypt(const unsigned char* m, unsigned char* c, size_t bytes)
231
236
x13 = j13;
232
237
x14 = j14;
233
238
x15 = j15;
234
- for (i = 20 ;i > 0 ;i -= 2 ) {
235
- QUARTERROUND ( x0, x4, x8,x12)
236
- QUARTERROUND ( x1, x5, x9,x13)
237
- QUARTERROUND ( x2, x6,x10,x14)
238
- QUARTERROUND ( x3, x7,x11,x15)
239
- QUARTERROUND ( x0, x5,x10,x15)
240
- QUARTERROUND ( x1, x6,x11,x12)
241
- QUARTERROUND ( x2, x7, x8,x13)
242
- QUARTERROUND ( x3, x4, x9,x14)
243
- }
239
+
240
+ // The 20 inner ChaCha20 rounds are unrolled here for performance.
241
+ REPEAT10 (
242
+ QUARTERROUND ( x0, x4, x8,x12);
243
+ QUARTERROUND ( x1, x5, x9,x13);
244
+ QUARTERROUND ( x2, x6,x10,x14);
245
+ QUARTERROUND ( x3, x7,x11,x15);
246
+ QUARTERROUND ( x0, x5,x10,x15);
247
+ QUARTERROUND ( x1, x6,x11,x12);
248
+ QUARTERROUND ( x2, x7, x8,x13);
249
+ QUARTERROUND ( x3, x4, x9,x14);
250
+ );
251
+
244
252
x0 += j0;
245
253
x1 += j1;
246
254
x2 += j2;
0 commit comments