|
537 | 537 | add $7*16, KEY
|
538 | 538 | .else
|
539 | 539 | add $(15+7)*16, KEY
|
540 |
| -.endif |
541 | 540 |
|
542 |
| - // Check whether the data length is a multiple of the AES block length. |
| 541 | + // When decrypting a message whose length isn't a multiple of the AES |
| 542 | + // block length, exclude the last full block from the main loop by |
| 543 | + // subtracting 16 from LEN. This is needed because ciphertext stealing |
| 544 | + // decryption uses the last two tweaks in reverse order. We'll handle |
| 545 | + // the last full block and the partial block specially at the end. |
543 | 546 | test $15, LEN
|
544 |
| - jnz .Lneed_cts\@ |
| 547 | + jnz .Lneed_cts_dec\@ |
545 | 548 | .Lxts_init\@:
|
| 549 | +.endif |
546 | 550 |
|
547 | 551 | // Cache as many round keys as possible.
|
548 | 552 | _load_round_keys
|
|
685 | 689 | _vaes_4x \enc, 1, 12
|
686 | 690 | jmp .Lencrypt_4x_done\@
|
687 | 691 |
|
688 |
| -.Lneed_cts\@: |
689 |
| - // The data length isn't a multiple of the AES block length, so |
690 |
| - // ciphertext stealing (CTS) will be needed. Subtract one block from |
691 |
| - // LEN so that the main loop doesn't process the last full block. The |
692 |
| - // CTS step will process it specially along with the partial block. |
| 692 | +.if !\enc |
| 693 | +.Lneed_cts_dec\@: |
693 | 694 | sub $16, LEN
|
694 | 695 | jmp .Lxts_init\@
|
| 696 | +.endif |
695 | 697 |
|
696 | 698 | .Lcts\@:
|
697 | 699 | // Do ciphertext stealing (CTS) to en/decrypt the last full block and
|
698 |
| - // the partial block. CTS needs two tweaks. TWEAK0_XMM contains the |
699 |
| - // next tweak; compute the one after that. Decryption uses these two |
700 |
| - // tweaks in reverse order, so also define aliases to handle that. |
701 |
| - _next_tweak TWEAK0_XMM, %xmm0, TWEAK1_XMM |
| 700 | + // the partial block. TWEAK0_XMM contains the next tweak. |
| 701 | + |
702 | 702 | .if \enc
|
703 |
| - .set CTS_TWEAK0, TWEAK0_XMM |
704 |
| - .set CTS_TWEAK1, TWEAK1_XMM |
| 703 | + // If encrypting, the main loop already encrypted the last full block to |
| 704 | + // create the CTS intermediate ciphertext. Prepare for the rest of CTS |
| 705 | + // by rewinding the pointers and loading the intermediate ciphertext. |
| 706 | + sub $16, SRC |
| 707 | + sub $16, DST |
| 708 | + vmovdqu (DST), %xmm0 |
705 | 709 | .else
|
706 |
| - .set CTS_TWEAK0, TWEAK1_XMM |
707 |
| - .set CTS_TWEAK1, TWEAK0_XMM |
708 |
| -.endif |
709 |
| - |
710 |
| - // En/decrypt the last full block. |
| 710 | + // If decrypting, the main loop didn't decrypt the last full block |
| 711 | + // because CTS decryption uses the last two tweaks in reverse order. |
| 712 | + // Do it now by advancing the tweak and decrypting the last full block. |
| 713 | + _next_tweak TWEAK0_XMM, %xmm0, TWEAK1_XMM |
711 | 714 | vmovdqu (SRC), %xmm0
|
712 |
| - _aes_crypt \enc, _XMM, CTS_TWEAK0, %xmm0 |
| 715 | + _aes_crypt \enc, _XMM, TWEAK1_XMM, %xmm0 |
| 716 | +.endif |
713 | 717 |
|
714 | 718 | .if USE_AVX10
|
715 | 719 | // Create a mask that has the first LEN bits set.
|
716 | 720 | mov $-1, %rax
|
717 | 721 | bzhi LEN, %rax, %rax
|
718 | 722 | kmovq %rax, %k1
|
719 | 723 |
|
720 |
| - // Swap the first LEN bytes of the above result with the partial block. |
721 |
| - // Note that to support in-place en/decryption, the load from the src |
722 |
| - // partial block must happen before the store to the dst partial block. |
| 724 | + // Swap the first LEN bytes of the en/decryption of the last full block |
| 725 | + // with the partial block. Note that to support in-place en/decryption, |
| 726 | + // the load from the src partial block must happen before the store to |
| 727 | + // the dst partial block. |
723 | 728 | vmovdqa %xmm0, %xmm1
|
724 | 729 | vmovdqu8 16(SRC), %xmm0{%k1}
|
725 | 730 | vmovdqu8 %xmm1, 16(DST){%k1}
|
|
750 | 755 | vpblendvb %xmm3, %xmm0, %xmm1, %xmm0
|
751 | 756 | .endif
|
752 | 757 | // En/decrypt again and store the last full block.
|
753 |
| - _aes_crypt \enc, _XMM, CTS_TWEAK1, %xmm0 |
| 758 | + _aes_crypt \enc, _XMM, TWEAK0_XMM, %xmm0 |
754 | 759 | vmovdqu %xmm0, (DST)
|
755 | 760 | jmp .Ldone\@
|
756 | 761 | .endm
|
|
0 commit comments