Skip to content

Commit ea9459e

Browse files
ebiggersherbertx
authored andcommitted
crypto: x86/aesni-xts - deduplicate aesni_xts_enc() and aesni_xts_dec()
Since aesni_xts_enc() and aesni_xts_dec() are very similar, generate them from a macro that's passed an argument enc=1 or enc=0. This reduces the length of aesni-intel_asm.S by 112 lines while still producing the exact same object file in both 32-bit and 64-bit mode. Signed-off-by: Eric Biggers <[email protected]> Reviewed-by: Ard Biesheuvel <[email protected]> Signed-off-by: Herbert Xu <[email protected]>
1 parent 1d27e1f commit ea9459e

File tree

1 file changed

+79
-191
lines changed

1 file changed

+79
-191
lines changed

arch/x86/crypto/aesni-intel_asm.S

Lines changed: 79 additions & 191 deletions
Original file line numberDiff line numberDiff line change
@@ -2825,28 +2825,24 @@ SYM_FUNC_END(aesni_ctr_enc)
28252825
.previous
28262826

28272827
/*
2828-
* _aesni_gf128mul_x_ble: internal ABI
2829-
* Multiply in GF(2^128) for XTS IVs
2828+
* _aesni_gf128mul_x_ble: Multiply in GF(2^128) for XTS IVs
28302829
* input:
28312830
* IV: current IV
28322831
* GF128MUL_MASK == mask with 0x87 and 0x01
28332832
* output:
28342833
* IV: next IV
28352834
* changed:
2836-
* CTR: == temporary value
2835+
* KEY: == temporary value
28372836
*/
2838-
#define _aesni_gf128mul_x_ble() \
2839-
pshufd $0x13, IV, KEY; \
2840-
paddq IV, IV; \
2841-
psrad $31, KEY; \
2842-
pand GF128MUL_MASK, KEY; \
2843-
pxor KEY, IV;
2837+
.macro _aesni_gf128mul_x_ble
2838+
pshufd $0x13, IV, KEY
2839+
paddq IV, IV
2840+
psrad $31, KEY
2841+
pand GF128MUL_MASK, KEY
2842+
pxor KEY, IV
2843+
.endm
28442844

2845-
/*
2846-
* void aesni_xts_enc(const struct crypto_aes_ctx *ctx, u8 *dst,
2847-
* const u8 *src, unsigned int len, le128 *iv)
2848-
*/
2849-
SYM_FUNC_START(aesni_xts_enc)
2845+
.macro _aesni_xts_crypt enc
28502846
FRAME_BEGIN
28512847
#ifndef __x86_64__
28522848
pushl IVP
@@ -2865,35 +2861,46 @@ SYM_FUNC_START(aesni_xts_enc)
28652861
movups (IVP), IV
28662862

28672863
mov 480(KEYP), KLEN
2864+
.if !\enc
2865+
add $240, KEYP
2866+
2867+
test $15, LEN
2868+
jz .Lxts_loop4\@
2869+
sub $16, LEN
2870+
.endif
28682871

2869-
.Lxts_enc_loop4:
2872+
.Lxts_loop4\@:
28702873
sub $64, LEN
2871-
jl .Lxts_enc_1x
2874+
jl .Lxts_1x\@
28722875

28732876
movdqa IV, STATE1
28742877
movdqu 0x00(INP), IN
28752878
pxor IN, STATE1
28762879
movdqu IV, 0x00(OUTP)
28772880

2878-
_aesni_gf128mul_x_ble()
2881+
_aesni_gf128mul_x_ble
28792882
movdqa IV, STATE2
28802883
movdqu 0x10(INP), IN
28812884
pxor IN, STATE2
28822885
movdqu IV, 0x10(OUTP)
28832886

2884-
_aesni_gf128mul_x_ble()
2887+
_aesni_gf128mul_x_ble
28852888
movdqa IV, STATE3
28862889
movdqu 0x20(INP), IN
28872890
pxor IN, STATE3
28882891
movdqu IV, 0x20(OUTP)
28892892

2890-
_aesni_gf128mul_x_ble()
2893+
_aesni_gf128mul_x_ble
28912894
movdqa IV, STATE4
28922895
movdqu 0x30(INP), IN
28932896
pxor IN, STATE4
28942897
movdqu IV, 0x30(OUTP)
28952898

2899+
.if \enc
28962900
call _aesni_enc4
2901+
.else
2902+
call _aesni_dec4
2903+
.endif
28972904

28982905
movdqu 0x00(OUTP), IN
28992906
pxor IN, STATE1
@@ -2911,17 +2918,17 @@ SYM_FUNC_START(aesni_xts_enc)
29112918
pxor IN, STATE4
29122919
movdqu STATE4, 0x30(OUTP)
29132920

2914-
_aesni_gf128mul_x_ble()
2921+
_aesni_gf128mul_x_ble
29152922

29162923
add $64, INP
29172924
add $64, OUTP
29182925
test LEN, LEN
2919-
jnz .Lxts_enc_loop4
2926+
jnz .Lxts_loop4\@
29202927

2921-
.Lxts_enc_ret_iv:
2928+
.Lxts_ret_iv\@:
29222929
movups IV, (IVP)
29232930

2924-
.Lxts_enc_ret:
2931+
.Lxts_ret\@:
29252932
#ifndef __x86_64__
29262933
popl KLEN
29272934
popl KEYP
@@ -2931,39 +2938,60 @@ SYM_FUNC_START(aesni_xts_enc)
29312938
FRAME_END
29322939
RET
29332940

2934-
.Lxts_enc_1x:
2941+
.Lxts_1x\@:
29352942
add $64, LEN
2936-
jz .Lxts_enc_ret_iv
2943+
jz .Lxts_ret_iv\@
2944+
.if \enc
29372945
sub $16, LEN
2938-
jl .Lxts_enc_cts4
2946+
jl .Lxts_cts4\@
2947+
.endif
29392948

2940-
.Lxts_enc_loop1:
2949+
.Lxts_loop1\@:
29412950
movdqu (INP), STATE
2951+
.if \enc
29422952
pxor IV, STATE
29432953
call _aesni_enc1
2954+
.else
2955+
add $16, INP
2956+
sub $16, LEN
2957+
jl .Lxts_cts1\@
29442958
pxor IV, STATE
2945-
_aesni_gf128mul_x_ble()
2959+
call _aesni_dec1
2960+
.endif
2961+
pxor IV, STATE
2962+
_aesni_gf128mul_x_ble
29462963

29472964
test LEN, LEN
2948-
jz .Lxts_enc_out
2965+
jz .Lxts_out\@
29492966

2967+
.if \enc
29502968
add $16, INP
29512969
sub $16, LEN
2952-
jl .Lxts_enc_cts1
2970+
jl .Lxts_cts1\@
2971+
.endif
29532972

29542973
movdqu STATE, (OUTP)
29552974
add $16, OUTP
2956-
jmp .Lxts_enc_loop1
2975+
jmp .Lxts_loop1\@
29572976

2958-
.Lxts_enc_out:
2977+
.Lxts_out\@:
29592978
movdqu STATE, (OUTP)
2960-
jmp .Lxts_enc_ret_iv
2979+
jmp .Lxts_ret_iv\@
29612980

2962-
.Lxts_enc_cts4:
2981+
.if \enc
2982+
.Lxts_cts4\@:
29632983
movdqa STATE4, STATE
29642984
sub $16, OUTP
2985+
.Lxts_cts1\@:
2986+
.else
2987+
.Lxts_cts1\@:
2988+
movdqa IV, STATE4
2989+
_aesni_gf128mul_x_ble
29652990

2966-
.Lxts_enc_cts1:
2991+
pxor IV, STATE
2992+
call _aesni_dec1
2993+
pxor IV, STATE
2994+
.endif
29672995
#ifndef __x86_64__
29682996
lea .Lcts_permute_table, T1
29692997
#else
@@ -2989,172 +3017,32 @@ SYM_FUNC_START(aesni_xts_enc)
29893017
pblendvb IN2, IN1
29903018
movaps IN1, STATE
29913019

3020+
.if \enc
29923021
pxor IV, STATE
29933022
call _aesni_enc1
29943023
pxor IV, STATE
3024+
.else
3025+
pxor STATE4, STATE
3026+
call _aesni_dec1
3027+
pxor STATE4, STATE
3028+
.endif
29953029

29963030
movups STATE, (OUTP)
2997-
jmp .Lxts_enc_ret
3031+
jmp .Lxts_ret\@
3032+
.endm
3033+
3034+
/*
3035+
* void aesni_xts_enc(const struct crypto_aes_ctx *ctx, u8 *dst,
3036+
* const u8 *src, unsigned int len, le128 *iv)
3037+
*/
3038+
SYM_FUNC_START(aesni_xts_enc)
3039+
_aesni_xts_crypt 1
29983040
SYM_FUNC_END(aesni_xts_enc)
29993041

30003042
/*
30013043
* void aesni_xts_dec(const struct crypto_aes_ctx *ctx, u8 *dst,
30023044
* const u8 *src, unsigned int len, le128 *iv)
30033045
*/
30043046
SYM_FUNC_START(aesni_xts_dec)
3005-
FRAME_BEGIN
3006-
#ifndef __x86_64__
3007-
pushl IVP
3008-
pushl LEN
3009-
pushl KEYP
3010-
pushl KLEN
3011-
movl (FRAME_OFFSET+20)(%esp), KEYP # ctx
3012-
movl (FRAME_OFFSET+24)(%esp), OUTP # dst
3013-
movl (FRAME_OFFSET+28)(%esp), INP # src
3014-
movl (FRAME_OFFSET+32)(%esp), LEN # len
3015-
movl (FRAME_OFFSET+36)(%esp), IVP # iv
3016-
movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK
3017-
#else
3018-
movdqa .Lgf128mul_x_ble_mask(%rip), GF128MUL_MASK
3019-
#endif
3020-
movups (IVP), IV
3021-
3022-
mov 480(KEYP), KLEN
3023-
add $240, KEYP
3024-
3025-
test $15, LEN
3026-
jz .Lxts_dec_loop4
3027-
sub $16, LEN
3028-
3029-
.Lxts_dec_loop4:
3030-
sub $64, LEN
3031-
jl .Lxts_dec_1x
3032-
3033-
movdqa IV, STATE1
3034-
movdqu 0x00(INP), IN
3035-
pxor IN, STATE1
3036-
movdqu IV, 0x00(OUTP)
3037-
3038-
_aesni_gf128mul_x_ble()
3039-
movdqa IV, STATE2
3040-
movdqu 0x10(INP), IN
3041-
pxor IN, STATE2
3042-
movdqu IV, 0x10(OUTP)
3043-
3044-
_aesni_gf128mul_x_ble()
3045-
movdqa IV, STATE3
3046-
movdqu 0x20(INP), IN
3047-
pxor IN, STATE3
3048-
movdqu IV, 0x20(OUTP)
3049-
3050-
_aesni_gf128mul_x_ble()
3051-
movdqa IV, STATE4
3052-
movdqu 0x30(INP), IN
3053-
pxor IN, STATE4
3054-
movdqu IV, 0x30(OUTP)
3055-
3056-
call _aesni_dec4
3057-
3058-
movdqu 0x00(OUTP), IN
3059-
pxor IN, STATE1
3060-
movdqu STATE1, 0x00(OUTP)
3061-
3062-
movdqu 0x10(OUTP), IN
3063-
pxor IN, STATE2
3064-
movdqu STATE2, 0x10(OUTP)
3065-
3066-
movdqu 0x20(OUTP), IN
3067-
pxor IN, STATE3
3068-
movdqu STATE3, 0x20(OUTP)
3069-
3070-
movdqu 0x30(OUTP), IN
3071-
pxor IN, STATE4
3072-
movdqu STATE4, 0x30(OUTP)
3073-
3074-
_aesni_gf128mul_x_ble()
3075-
3076-
add $64, INP
3077-
add $64, OUTP
3078-
test LEN, LEN
3079-
jnz .Lxts_dec_loop4
3080-
3081-
.Lxts_dec_ret_iv:
3082-
movups IV, (IVP)
3083-
3084-
.Lxts_dec_ret:
3085-
#ifndef __x86_64__
3086-
popl KLEN
3087-
popl KEYP
3088-
popl LEN
3089-
popl IVP
3090-
#endif
3091-
FRAME_END
3092-
RET
3093-
3094-
.Lxts_dec_1x:
3095-
add $64, LEN
3096-
jz .Lxts_dec_ret_iv
3097-
3098-
.Lxts_dec_loop1:
3099-
movdqu (INP), STATE
3100-
3101-
add $16, INP
3102-
sub $16, LEN
3103-
jl .Lxts_dec_cts1
3104-
3105-
pxor IV, STATE
3106-
call _aesni_dec1
3107-
pxor IV, STATE
3108-
_aesni_gf128mul_x_ble()
3109-
3110-
test LEN, LEN
3111-
jz .Lxts_dec_out
3112-
3113-
movdqu STATE, (OUTP)
3114-
add $16, OUTP
3115-
jmp .Lxts_dec_loop1
3116-
3117-
.Lxts_dec_out:
3118-
movdqu STATE, (OUTP)
3119-
jmp .Lxts_dec_ret_iv
3120-
3121-
.Lxts_dec_cts1:
3122-
movdqa IV, STATE4
3123-
_aesni_gf128mul_x_ble()
3124-
3125-
pxor IV, STATE
3126-
call _aesni_dec1
3127-
pxor IV, STATE
3128-
3129-
#ifndef __x86_64__
3130-
lea .Lcts_permute_table, T1
3131-
#else
3132-
lea .Lcts_permute_table(%rip), T1
3133-
#endif
3134-
add LEN, INP /* rewind input pointer */
3135-
add $16, LEN /* # bytes in final block */
3136-
movups (INP), IN1
3137-
3138-
mov T1, IVP
3139-
add $32, IVP
3140-
add LEN, T1
3141-
sub LEN, IVP
3142-
add OUTP, LEN
3143-
3144-
movups (T1), %xmm4
3145-
movaps STATE, IN2
3146-
pshufb %xmm4, STATE
3147-
movups STATE, (LEN)
3148-
3149-
movups (IVP), %xmm0
3150-
pshufb %xmm0, IN1
3151-
pblendvb IN2, IN1
3152-
movaps IN1, STATE
3153-
3154-
pxor STATE4, STATE
3155-
call _aesni_dec1
3156-
pxor STATE4, STATE
3157-
3158-
movups STATE, (OUTP)
3159-
jmp .Lxts_dec_ret
3047+
_aesni_xts_crypt 0
31603048
SYM_FUNC_END(aesni_xts_dec)

0 commit comments

Comments
 (0)