@@ -53,6 +53,11 @@ static const uint8_t CONST1[16] = {
5353 state[(i_1 + offset) % STATE] = SIMD_XOR(state[(i_1 + offset) % STATE], M);\
5454 state[(i_2 + offset) % STATE] = SIMD_XOR(state[(i_2 + offset) % STATE], M);
5555
56+ #define KEYSTREAM_BLOCK (M , offset )\
57+ tmp[offset] = SIMD_XOR(state[(P_0 + offset) % STATE], state[(P_1 + offset) % STATE]);\
58+ M = AESENC(tmp[offset], M);\
59+ M = SIMD_XOR(M, state[(P_7 + offset) % STATE]);
60+
5661#define ENC_offset (M , C , offset )\
5762 C = SIMD_XOR(state[(P_0 + offset) % STATE], state[(P_1 + offset) % STATE]);\
5863 C = AESENC(C, M);\
@@ -79,9 +84,14 @@ static const uint8_t CONST1[16] = {
7984 state[(i_1 + offset) % STATE] = SIMD_XOR(state[(i_1 + offset) % STATE], M);\
8085 state[(i_2 + offset) % STATE] = SIMD_XOR(state[(i_2 + offset) % STATE], M);
8186
82- #define ENC_offset ( M , C , offset )\
87+ #define KEYSTREAM_BLOCK ( M , offset )\
8388 tmp[offset] = AESEMC(state[(P_0 + offset) % STATE], state[(P_1 + offset) % STATE]);\
84- C = SIMD_XOR(tmp[offset], M); \
89+ M = SIMD_XOR(tmp[offset], M);\
90+ M = SIMD_XOR(M, state[(P_7 + offset) % STATE]);
91+
92+ #define ENC_offset (M , C , offset )\
93+ C = AESEMC(state[(P_0 + offset) % STATE], state[(P_1 + offset) % STATE]);\
94+ C = SIMD_XOR(C, M); \
8595 state[(0 + offset) % STATE] = SIMD_XOR(C, AESL(state[(P_4 + offset) % STATE]));\
8696 C = SIMD_XOR(C, state[(P_7 + offset) % STATE]); \
8797 state[(i_1 + offset) % STATE] = SIMD_XOR(state[(i_1 + offset) % STATE], M);\
@@ -344,11 +354,22 @@ void HiAE_stream_proc_ad(DATA128b* state, const uint8_t *ad, size_t len) {
344354 for (; i < prefix ; i += UNROLL_BLOCK_SIZE ) {
345355 AD_UPDATE ;
346356 }
357+
358+ size_t pad = len % BLOCK_SIZE ;
359+ len -= pad ;
347360 for (; i < len ; i += BLOCK_SIZE ) {
348361 M [0 ] = SIMD_LOAD (ad + i );
349362 UPDATE_STATE_offset (M [0 ], 0 );
350363 STATE_SHIFT ;
351364 }
365+ if (pad != 0 ) {
366+ uint8_t buf [BLOCK_SIZE ];
367+ memset (buf , 0x00 , sizeof (buf ));
368+ memcpy (buf , ad + len , pad );
369+ M [0 ] = SIMD_LOAD (buf );
370+ UPDATE_STATE_offset (M [0 ], 0 );
371+ STATE_SHIFT ;
372+ }
352373}
353374
354375void HiAE_stream_finalize (DATA128b * state , uint64_t ad_len , uint64_t plain_len , uint8_t * tag ) {
@@ -371,7 +392,7 @@ void HiAE_stream_encrypt(DATA128b* state, uint8_t *dst, const uint8_t *src, size
371392 size_t prefix = size - rest ;
372393 if (size == 0 )
373394 return ;
374- DATA128b M [STATE ], C [STATE ], tmp [STATE ], temp ;
395+ DATA128b M [STATE ], C [STATE ], tmp [1 ] ;
375396 #if defined(__VAES__ ) && defined(__x86_64__ ) && defined(__AVX512F__ )
376397 // asm code optimized for VAES support devices
377398
@@ -606,20 +627,32 @@ void HiAE_stream_encrypt(DATA128b* state, uint8_t *dst, const uint8_t *src, size
606627 }
607628 #endif
608629
630+ size_t pad = rest % BLOCK_SIZE ;
631+ rest -= pad ;
609632 for (size_t i = 0 ; i < rest ; i += BLOCK_SIZE ) {
610633 M [0 ] = SIMD_LOAD (src + i + prefix );
611634 ENC_offset (M [0 ], C [0 ], 0 );
612635 STATE_SHIFT ;
613636 SIMD_STORE (dst + i + prefix , C [0 ]);
614637 }
638+ if (pad != 0 ) {
639+ uint8_t buf [BLOCK_SIZE ];
640+ memcpy (buf , src + rest + prefix , pad );
641+ memset (buf + pad , 0 , BLOCK_SIZE - pad );
642+ M [0 ] = SIMD_LOAD (buf );
643+ ENC_offset (M [0 ], C [0 ], 0 );
644+ STATE_SHIFT ;
645+ SIMD_STORE (buf , C [0 ]);
646+ memcpy (dst + rest + prefix , buf , pad );
647+ }
615648}
616649
617650void HiAE_stream_decrypt (DATA128b * state , uint8_t * dst , const uint8_t * src , size_t size ) {
618651 size_t rest = size % UNROLL_BLOCK_SIZE ;
619652 size_t prefix = size - rest ;
620653 if (size == 0 )
621654 return ;
622- DATA128b M [STATE ], C [STATE ], tmp [STATE ], temp ;
655+ DATA128b M [STATE ], C [STATE ], tmp [STATE ];
623656
624657 #if defined(__VAES__ ) && defined(__x86_64__ ) && defined(__AVX512F__ )
625658 // asm code optimized for VAES support devices
@@ -857,20 +890,33 @@ void HiAE_stream_decrypt(DATA128b* state, uint8_t *dst, const uint8_t *src, size
857890
858891 #endif
859892
893+ size_t pad = rest % BLOCK_SIZE ;
894+ rest -= pad ;
895+
860896 for (size_t i = 0 ; i < rest ; i += BLOCK_SIZE ) {
861897 C [0 ] = SIMD_LOAD (src + i + prefix );
862898 DEC_offset (M [0 ], C [0 ], 0 );
863899 STATE_SHIFT ;
864900 SIMD_STORE (dst + i + prefix , M [0 ]);
865901 }
902+ if (pad != 0 ) {
903+ uint8_t buf [BLOCK_SIZE ];
904+ uint8_t mask [BLOCK_SIZE ];
905+ memcpy (buf , src + rest + prefix , pad );
906+ memset (mask , 0xff , pad );
907+ memset (mask + pad , 0x00 , BLOCK_SIZE - pad );
908+ C [0 ] = SIMD_LOAD (buf );
909+ M [0 ] = SIMD_LOAD (mask );
910+ KEYSTREAM_BLOCK (C [0 ], 0 );
911+ C [0 ] &= M [0 ];
912+ UPDATE_STATE_offset (C [0 ], 0 );
913+ STATE_SHIFT ;
914+ SIMD_STORE (buf , C [0 ]);
915+ memcpy (dst + rest + prefix , buf , pad );
916+ }
866917}
867918
868- int HiAE_AEAD_encrypt (uint8_t * key , uint8_t * iv , uint8_t * plain , uint8_t * cipher , size_t msg_len , uint8_t * ad , size_t ad_len , uint8_t * tag ) {
869- if (ad_len % BLOCK_SIZE != 0 || msg_len % BLOCK_SIZE != 0 )
870- {
871- return 1 ;
872- }
873-
919+ int HiAE_AEAD_encrypt (uint8_t * key , uint8_t * iv , uint8_t * plain , uint8_t * cipher , size_t msg_len , uint8_t * ad , size_t ad_len , uint8_t * tag ) {
874920 DATA128b state [STATE ];
875921 HiAE_stream_init (state , key , iv );
876922 HiAE_stream_proc_ad (state , ad , ad_len );
@@ -881,10 +927,6 @@ int HiAE_AEAD_encrypt(uint8_t* key, uint8_t* iv, uint8_t* plain, uint8_t* cipher
881927}
882928
883929int HiAE_AEAD_decrypt (uint8_t * key , uint8_t * iv , uint8_t * plain , uint8_t * cipher , size_t msg_len , uint8_t * ad , size_t ad_len , uint8_t * tag ) {
884- if (ad_len % BLOCK_SIZE != 0 || msg_len % BLOCK_SIZE != 0 )
885- {
886- return 1 ;
887- }
888930
889931 DATA128b state [STATE ];
890932 HiAE_stream_init (state , key , iv );
@@ -896,10 +938,6 @@ int HiAE_AEAD_decrypt(uint8_t* key, uint8_t* iv, uint8_t* plain, uint8_t* cipher
896938}
897939
898940int HiAE_encrypt (uint8_t * key , uint8_t * iv , uint8_t * plain , uint8_t * cipher , size_t msg_len ) {
899- if (msg_len % BLOCK_SIZE != 0 )
900- {
901- return 1 ;
902- }
903941
904942 DATA128b state [STATE ];
905943 HiAE_stream_init (state , key , iv );
@@ -909,10 +947,6 @@ int HiAE_encrypt(uint8_t* key, uint8_t* iv, uint8_t* plain, uint8_t* cipher, siz
909947}
910948
911949int HiAE_decrypt (uint8_t * key , uint8_t * iv , uint8_t * plain , uint8_t * cipher , size_t msg_len ) {
912- if (msg_len % BLOCK_SIZE != 0 )
913- {
914- return 1 ;
915- }
916950
917951 DATA128b state [STATE ];
918952 HiAE_stream_init (state , key , iv );
@@ -922,10 +956,6 @@ int HiAE_decrypt(uint8_t* key, uint8_t* iv, uint8_t* plain, uint8_t* cipher, siz
922956}
923957
924958int HiAE_verification (uint8_t * key , uint8_t * iv , uint8_t * ad , size_t ad_len , uint8_t * tag ) {
925- if (ad_len % BLOCK_SIZE != 0 )
926- {
927- return 1 ;
928- }
929959
930960 DATA128b state [STATE ];
931961 HiAE_stream_init (state , key , iv );
0 commit comments