|
559 | 559 | .macro _aes_xts_crypt enc
|
560 | 560 | _define_aliases
|
561 | 561 |
|
562 |
| - // Load the AES key length: 16 (AES-128), 24 (AES-192), or 32 (AES-256). |
563 |
| - movl 480(KEY), KEYLEN |
564 |
| - |
565 | 562 | .if !\enc
|
566 | 563 | // When decrypting a message whose length isn't a multiple of the AES
|
567 | 564 | // block length, exclude the last full block from the main loop by
|
568 | 565 | // subtracting 16 from LEN. This is needed because ciphertext stealing
|
569 | 566 | // decryption uses the last two tweaks in reverse order. We'll handle
|
570 | 567 | // the last full block and the partial block specially at the end.
|
| 568 | + lea -16(LEN), %rax |
571 | 569 | test $15, LEN
|
572 |
| - jnz .Lneed_cts_dec\@ |
573 |
| -.Lxts_init\@: |
| 570 | + cmovnz %rax, LEN |
574 | 571 | .endif
|
575 | 572 |
|
| 573 | + // Load the AES key length: 16 (AES-128), 24 (AES-192), or 32 (AES-256). |
| 574 | + movl 480(KEY), KEYLEN |
| 575 | + |
576 | 576 | // Setup the pointer to the round keys and cache as many as possible.
|
577 | 577 | _setup_round_keys \enc
|
578 | 578 |
|
|
661 | 661 | RET
|
662 | 662 |
|
663 | 663 | .Lhandle_remainder\@:
|
664 |
| - add $4*VL, LEN // Undo the extra sub from earlier. |
665 | 664 |
|
666 | 665 | // En/decrypt any remaining full blocks, one vector at a time.
|
667 | 666 | .if VL > 16
|
668 |
| - sub $VL, LEN |
| 667 | + add $3*VL, LEN // Undo extra sub of 4*VL, then sub VL. |
669 | 668 | jl .Lvec_at_a_time_done\@
|
670 | 669 | .Lvec_at_a_time\@:
|
671 | 670 | _vmovdqu (SRC), V0
|
|
677 | 676 | sub $VL, LEN
|
678 | 677 | jge .Lvec_at_a_time\@
|
679 | 678 | .Lvec_at_a_time_done\@:
|
680 |
| - add $VL-16, LEN // Undo the extra sub from earlier. |
| 679 | + add $VL-16, LEN // Undo extra sub of VL, then sub 16. |
681 | 680 | .else
|
682 |
| - sub $16, LEN |
| 681 | + add $4*VL-16, LEN // Undo extra sub of 4*VL, then sub 16. |
683 | 682 | .endif
|
684 | 683 |
|
685 | 684 | // En/decrypt any remaining full blocks, one at a time.
|
|
694 | 693 | sub $16, LEN
|
695 | 694 | jge .Lblock_at_a_time\@
|
696 | 695 | .Lblock_at_a_time_done\@:
|
697 |
| - add $16, LEN // Undo the extra sub from earlier. |
698 |
| - |
699 |
| -.Lfull_blocks_done\@: |
700 |
| - // Now 0 <= LEN <= 15. If LEN is nonzero, do ciphertext stealing to |
701 |
| - // process the last 16 + LEN bytes. If LEN is zero, we're done. |
702 |
| - test LEN, LEN |
703 |
| - jnz .Lcts\@ |
704 |
| - jmp .Ldone\@ |
705 |
| - |
706 |
| -.if !\enc |
707 |
| -.Lneed_cts_dec\@: |
708 |
| - sub $16, LEN |
709 |
| - jmp .Lxts_init\@ |
710 |
| -.endif |
| 696 | + add $16, LEN // Undo the extra sub of 16. |
| 697 | + // Now 0 <= LEN <= 15. If LEN is zero, we're done. |
| 698 | + jz .Ldone\@ |
711 | 699 |
|
712 |
| -.Lcts\@: |
713 |
| - // Do ciphertext stealing (CTS) to en/decrypt the last full block and |
714 |
| - // the partial block. TWEAK0_XMM contains the next tweak. |
| 700 | + // Otherwise 1 <= LEN <= 15, but the real remaining length is 16 + LEN. |
| 701 | + // Do ciphertext stealing to process the last 16 + LEN bytes. |
715 | 702 |
|
716 | 703 | .if \enc
|
717 | 704 | // If encrypting, the main loop already encrypted the last full block to
|
|
0 commit comments