@@ -93,7 +93,7 @@ static inline void PartialStore(const __m512i val, uint8_t *Dest, uint64_t Size)
9393 lc_memset_secure (BuffForPartialOp , 0 , sizeof (BuffForPartialOp ));
9494}
9595
96- #define DISABLE_16_BLOCKS
96+ #undef DISABLE_16_BLOCKS
9797int cc20_crypt_bytes_avx512 (uint32_t * state , const uint8_t * in , uint8_t * out ,
9898 uint64_t len )
9999{
@@ -159,7 +159,7 @@ int cc20_crypt_bytes_avx512(uint32_t *state, const uint8_t *in, uint8_t *out,
159159 ws -> T3 = _mm512_set_epi64 (0 , 15 , 0 , 14 , 0 , 13 , 0 , 12 );
160160
161161 ws -> state3_2 = _mm512_add_epi32 (ws -> T1 , ws -> T2 );
162- ws -> ws -> state3_3 = _mm512_add_epi32 (ws -> T1 , ws -> T3 );
162+ ws -> state3_3 = _mm512_add_epi32 (ws -> T1 , ws -> T3 );
163163
164164 ws -> ctr_increment =
165165 _mm512_set_epi64 (0 , 16 , 0 , 16 , 0 , 16 , 0 , 16 );
@@ -311,7 +311,7 @@ int cc20_crypt_bytes_avx512(uint32_t *state, const uint8_t *in, uint8_t *out,
311311
312312 ws -> X0_3 = _mm512_xor_si512 (ws -> X0_3 , ws -> X0_0 );
313313 ws -> X1_3 = _mm512_xor_si512 (ws -> X1_3 , ws -> X1_0 );
314- ws -> X2_3 = _mm512_xor_si512 (ws -> X2_3 , ws -> X3_0 );
314+ ws -> X2_3 = _mm512_xor_si512 (ws -> X2_3 , ws -> X2_0 );
315315 ws -> X3_3 = _mm512_xor_si512 (ws -> X3_3 , ws -> X3_0 );
316316
317317 ws -> X0_3 = _mm512_rol_epi32 (ws -> X0_3 , 8 );
@@ -321,8 +321,8 @@ int cc20_crypt_bytes_avx512(uint32_t *state, const uint8_t *in, uint8_t *out,
321321
322322 ws -> X0_2 = _mm512_add_epi32 (ws -> X0_2 , ws -> X0_3 );
323323 ws -> X1_2 = _mm512_add_epi32 (ws -> X1_2 , ws -> X1_3 );
324- ws -> X2_2 = _mm512_add_epi32 (ws -> X2_2 , ws -> X3_3 );
325- ws -> X3_2 = _mm512_add_epi32 (ws -> X2_2 , ws -> X3_3 );
324+ ws -> X2_2 = _mm512_add_epi32 (ws -> X2_2 , ws -> X2_3 );
325+ ws -> X3_2 = _mm512_add_epi32 (ws -> X3_2 , ws -> X3_3 );
326326
327327 ws -> X0_1 = _mm512_xor_si512 (ws -> X0_1 , ws -> X0_2 );
328328 ws -> X1_1 = _mm512_xor_si512 (ws -> X1_1 , ws -> X1_2 );
@@ -543,7 +543,7 @@ int cc20_crypt_bytes_avx512(uint32_t *state, const uint8_t *in, uint8_t *out,
543543 (const __m512i * )(CurrentIn + 13 * 64 ));
544544 ws -> T3 = _mm512_loadu_si512 (
545545 (const __m512i * )(CurrentIn + 14 * 64 ));
546- T4 = _mm512_loadu_si512 (
546+ ws -> T4 = _mm512_loadu_si512 (
547547 (const __m512i * )(CurrentIn + 15 * 64 ));
548548#pragma GCC diagnostic pop
549549
0 commit comments