Skip to content

Commit e619723

Browse files
ebiggersherbertx
authored andcommitted
crypto: x86/aes-xts - eliminate a few more instructions
- For conditionally subtracting 16 from LEN when decrypting a message whose length isn't a multiple of 16, use the cmovnz instruction. - Fold the addition of 4*VL to LEN into the sub of VL or 16 from LEN. - Remove an unnecessary test instruction. This results in slightly shorter code, both source and binary. Signed-off-by: Eric Biggers <[email protected]> Signed-off-by: Herbert Xu <[email protected]>
1 parent 2717e01 commit e619723

File tree

1 file changed

+13
-26
lines changed

1 file changed

+13
-26
lines changed

arch/x86/crypto/aes-xts-avx-x86_64.S

Lines changed: 13 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -559,20 +559,20 @@
559559
.macro _aes_xts_crypt enc
560560
_define_aliases
561561

562-
// Load the AES key length: 16 (AES-128), 24 (AES-192), or 32 (AES-256).
563-
movl 480(KEY), KEYLEN
564-
565562
.if !\enc
566563
// When decrypting a message whose length isn't a multiple of the AES
567564
// block length, exclude the last full block from the main loop by
568565
// subtracting 16 from LEN. This is needed because ciphertext stealing
569566
// decryption uses the last two tweaks in reverse order. We'll handle
570567
// the last full block and the partial block specially at the end.
568+
lea -16(LEN), %rax
571569
test $15, LEN
572-
jnz .Lneed_cts_dec\@
573-
.Lxts_init\@:
570+
cmovnz %rax, LEN
574571
.endif
575572

573+
// Load the AES key length: 16 (AES-128), 24 (AES-192), or 32 (AES-256).
574+
movl 480(KEY), KEYLEN
575+
576576
// Setup the pointer to the round keys and cache as many as possible.
577577
_setup_round_keys \enc
578578

@@ -661,11 +661,10 @@
661661
RET
662662

663663
.Lhandle_remainder\@:
664-
add $4*VL, LEN // Undo the extra sub from earlier.
665664

666665
// En/decrypt any remaining full blocks, one vector at a time.
667666
.if VL > 16
668-
sub $VL, LEN
667+
add $3*VL, LEN // Undo extra sub of 4*VL, then sub VL.
669668
jl .Lvec_at_a_time_done\@
670669
.Lvec_at_a_time\@:
671670
_vmovdqu (SRC), V0
@@ -677,9 +676,9 @@
677676
sub $VL, LEN
678677
jge .Lvec_at_a_time\@
679678
.Lvec_at_a_time_done\@:
680-
add $VL-16, LEN // Undo the extra sub from earlier.
679+
add $VL-16, LEN // Undo extra sub of VL, then sub 16.
681680
.else
682-
sub $16, LEN
681+
add $4*VL-16, LEN // Undo extra sub of 4*VL, then sub 16.
683682
.endif
684683

685684
// En/decrypt any remaining full blocks, one at a time.
@@ -694,24 +693,12 @@
694693
sub $16, LEN
695694
jge .Lblock_at_a_time\@
696695
.Lblock_at_a_time_done\@:
697-
add $16, LEN // Undo the extra sub from earlier.
698-
699-
.Lfull_blocks_done\@:
700-
// Now 0 <= LEN <= 15. If LEN is nonzero, do ciphertext stealing to
701-
// process the last 16 + LEN bytes. If LEN is zero, we're done.
702-
test LEN, LEN
703-
jnz .Lcts\@
704-
jmp .Ldone\@
705-
706-
.if !\enc
707-
.Lneed_cts_dec\@:
708-
sub $16, LEN
709-
jmp .Lxts_init\@
710-
.endif
696+
add $16, LEN // Undo the extra sub of 16.
697+
// Now 0 <= LEN <= 15. If LEN is zero, we're done.
698+
jz .Ldone\@
711699

712-
.Lcts\@:
713-
// Do ciphertext stealing (CTS) to en/decrypt the last full block and
714-
// the partial block. TWEAK0_XMM contains the next tweak.
700+
// Otherwise 1 <= LEN <= 15, but the real remaining length is 16 + LEN.
701+
// Do ciphertext stealing to process the last 16 + LEN bytes.
715702

716703
.if \enc
717704
// If encrypting, the main loop already encrypted the last full block to

0 commit comments

Comments
 (0)