@@ -18,6 +18,8 @@ constexpr static inline uint32_t rotl32(uint32_t v, int c) { return (v << c) | (
1818 a += b; d = rotl32(d ^ a, 8 ); \
1919 c += d; b = rotl32(b ^ c, 7 );
2020
21+ #define REPEAT10 (a ) do { {a}; {a}; {a}; {a}; {a}; {a}; {a}; {a}; {a}; {a}; } while (0 )
22+
2123static const unsigned char sigma[] = " expand 32-byte k" ;
2224static const unsigned char tau[] = " expand 16-byte k" ;
2325
@@ -119,16 +121,19 @@ void ChaCha20::Keystream(unsigned char* c, size_t bytes)
119121 x13 = j13;
120122 x14 = j14;
121123 x15 = j15;
122- for (i = 20 ;i > 0 ;i -= 2 ) {
123- QUARTERROUND ( x0, x4, x8,x12)
124- QUARTERROUND ( x1, x5, x9,x13)
125- QUARTERROUND ( x2, x6,x10,x14)
126- QUARTERROUND ( x3, x7,x11,x15)
127- QUARTERROUND ( x0, x5,x10,x15)
128- QUARTERROUND ( x1, x6,x11,x12)
129- QUARTERROUND ( x2, x7, x8,x13)
130- QUARTERROUND ( x3, x4, x9,x14)
131- }
124+
125+ // The 20 inner ChaCha20 rounds are unrolled here for performance.
126+ REPEAT10 (
127+ QUARTERROUND ( x0, x4, x8,x12);
128+ QUARTERROUND ( x1, x5, x9,x13);
129+ QUARTERROUND ( x2, x6,x10,x14);
130+ QUARTERROUND ( x3, x7,x11,x15);
131+ QUARTERROUND ( x0, x5,x10,x15);
132+ QUARTERROUND ( x1, x6,x11,x12);
133+ QUARTERROUND ( x2, x7, x8,x13);
134+ QUARTERROUND ( x3, x4, x9,x14);
135+ );
136+
132137 x0 += j0;
133138 x1 += j1;
134139 x2 += j2;
@@ -231,16 +236,19 @@ void ChaCha20::Crypt(const unsigned char* m, unsigned char* c, size_t bytes)
231236 x13 = j13;
232237 x14 = j14;
233238 x15 = j15;
234- for (i = 20 ;i > 0 ;i -= 2 ) {
235- QUARTERROUND ( x0, x4, x8,x12)
236- QUARTERROUND ( x1, x5, x9,x13)
237- QUARTERROUND ( x2, x6,x10,x14)
238- QUARTERROUND ( x3, x7,x11,x15)
239- QUARTERROUND ( x0, x5,x10,x15)
240- QUARTERROUND ( x1, x6,x11,x12)
241- QUARTERROUND ( x2, x7, x8,x13)
242- QUARTERROUND ( x3, x4, x9,x14)
243- }
239+
240+ // The 20 inner ChaCha20 rounds are unrolled here for performance.
241+ REPEAT10 (
242+ QUARTERROUND ( x0, x4, x8,x12);
243+ QUARTERROUND ( x1, x5, x9,x13);
244+ QUARTERROUND ( x2, x6,x10,x14);
245+ QUARTERROUND ( x3, x7,x11,x15);
246+ QUARTERROUND ( x0, x5,x10,x15);
247+ QUARTERROUND ( x1, x6,x11,x12);
248+ QUARTERROUND ( x2, x7, x8,x13);
249+ QUARTERROUND ( x3, x4, x9,x14);
250+ );
251+
244252 x0 += j0;
245253 x1 += j1;
246254 x2 += j2;
0 commit comments