|
85 | 85 | // advanced to point to 7th-from-last round key
|
86 | 86 | .set SRC, %rsi // Pointer to next source data
|
87 | 87 | .set DST, %rdx // Pointer to next destination data
|
88 |
| -.set LEN, %rcx // Remaining length in bytes |
| 88 | +.set LEN, %ecx // Remaining length in bytes |
| 89 | +.set LEN8, %cl |
| 90 | +.set LEN64, %rcx |
89 | 91 | .set TWEAK, %r8 // Pointer to next tweak
|
90 | 92 |
|
91 |
| -// %r9 holds the AES key length in bytes. |
92 |
| -.set KEYLEN, %r9d |
93 |
| -.set KEYLEN64, %r9 |
| 93 | +// %rax holds the AES key length in bytes. |
| 94 | +.set KEYLEN, %eax |
| 95 | +.set KEYLEN64, %rax |
94 | 96 |
|
95 |
| -// %rax and %r10-r11 are available as temporaries. |
| 97 | +// %r9-r11 are available as temporaries. |
96 | 98 |
|
97 | 99 | .macro _define_Vi i
|
98 | 100 | .if VL == 16
|
|
565 | 567 | // subtracting 16 from LEN. This is needed because ciphertext stealing
|
566 | 568 | // decryption uses the last two tweaks in reverse order. We'll handle
|
567 | 569 | // the last full block and the partial block specially at the end.
|
568 |
| - lea -16(LEN), %rax |
569 |
| - test $15, LEN |
570 |
| - cmovnz %rax, LEN |
| 570 | + lea -16(LEN), %eax |
| 571 | + test $15, LEN8 |
| 572 | + cmovnz %eax, LEN |
571 | 573 | .endif
|
572 | 574 |
|
573 | 575 | // Load the AES key length: 16 (AES-128), 24 (AES-192), or 32 (AES-256).
|
|
650 | 652 | // Check for the uncommon case where the data length isn't a multiple of
|
651 | 653 | // 4*VL. Handle it out-of-line in order to optimize for the common
|
652 | 654 | // case. In the common case, just fall through to the ret.
|
653 |
| - test $4*VL-1, LEN |
| 655 | + test $4*VL-1, LEN8 |
654 | 656 | jnz .Lhandle_remainder\@
|
655 | 657 | .Ldone\@:
|
656 | 658 | // Store the next tweak back to *TWEAK to support continuation calls.
|
|
718 | 720 |
|
719 | 721 | .if USE_AVX10
|
720 | 722 | // Create a mask that has the first LEN bits set.
|
721 |
| - mov $-1, %rax |
722 |
| - bzhi LEN, %rax, %rax |
723 |
| - kmovq %rax, %k1 |
| 723 | + mov $-1, %r9d |
| 724 | + bzhi LEN, %r9d, %r9d |
| 725 | + kmovd %r9d, %k1 |
724 | 726 |
|
725 | 727 | // Swap the first LEN bytes of the en/decryption of the last full block
|
726 | 728 | // with the partial block. Note that to support in-place en/decryption,
|
|
730 | 732 | vmovdqu8 16(SRC), %xmm0{%k1}
|
731 | 733 | vmovdqu8 %xmm1, 16(DST){%k1}
|
732 | 734 | .else
|
733 |
| - lea .Lcts_permute_table(%rip), %rax |
| 735 | + lea .Lcts_permute_table(%rip), %r9 |
734 | 736 |
|
735 | 737 | // Load the src partial block, left-aligned. Note that to support
|
736 | 738 | // in-place en/decryption, this must happen before the store to the dst
|
737 | 739 | // partial block.
|
738 |
| - vmovdqu (SRC, LEN, 1), %xmm1 |
| 740 | + vmovdqu (SRC, LEN64, 1), %xmm1 |
739 | 741 |
|
740 | 742 | // Shift the first LEN bytes of the en/decryption of the last full block
|
741 | 743 | // to the end of a register, then store it to DST+LEN. This stores the
|
742 | 744 | // dst partial block. It also writes to the second part of the dst last
|
743 | 745 | // full block, but that part is overwritten later.
|
744 |
| - vpshufb (%rax, LEN, 1), %xmm0, %xmm2 |
745 |
| - vmovdqu %xmm2, (DST, LEN, 1) |
| 746 | + vpshufb (%r9, LEN64, 1), %xmm0, %xmm2 |
| 747 | + vmovdqu %xmm2, (DST, LEN64, 1) |
746 | 748 |
|
747 | 749 | // Make xmm3 contain [16-LEN,16-LEN+1,...,14,15,0x80,0x80,...].
|
748 |
| - sub LEN, %rax |
749 |
| - vmovdqu 32(%rax), %xmm3 |
| 750 | + sub LEN64, %r9 |
| 751 | + vmovdqu 32(%r9), %xmm3 |
750 | 752 |
|
751 | 753 | // Shift the src partial block to the beginning of its register.
|
752 | 754 | vpshufb %xmm3, %xmm1, %xmm1
|
@@ -795,7 +797,7 @@ SYM_FUNC_END(aes_xts_encrypt_iv)
|
795 | 797 | // instantiated from the above macro. They all have the following prototype:
|
796 | 798 | //
|
797 | 799 | // void (*xts_asm_func)(const struct crypto_aes_ctx *key,
|
798 |
| -// const u8 *src, u8 *dst, size_t len, |
| 800 | +// const u8 *src, u8 *dst, unsigned int len, |
799 | 801 | // u8 tweak[AES_BLOCK_SIZE]);
|
800 | 802 | //
|
801 | 803 | // |key| is the data key. |tweak| contains the next tweak; the encryption of
|
|
0 commit comments