Skip to content

Commit 595bca2

Browse files
ebiggersherbertx
authored andcommitted
crypto: x86/aegis128 - don't bother with special code for aligned data
Remove the AEGIS assembly code paths that were "optimized" to operate on 16-byte aligned data using movdqa, and instead just use the code paths that use movdqu and can handle data with any alignment. This does not reduce performance. movdqa is basically a historical artifact; on aligned data, movdqu and movdqa have had the same performance since Intel Nehalem (2008) and AMD Bulldozer (2011). And code that requires AES-NI cannot run on CPUs older than those anyway. Reviewed-by: Ondrej Mosnacek <[email protected]> Signed-off-by: Eric Biggers <[email protected]> Signed-off-by: Herbert Xu <[email protected]>
1 parent b8d2e7b commit 595bca2

File tree

1 file changed

+22
-100
lines changed

1 file changed

+22
-100
lines changed

arch/x86/crypto/aegis128-aesni-asm.S

Lines changed: 22 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -245,52 +245,8 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad)
245245
movdqu 0x30(STATEP), STATE3
246246
movdqu 0x40(STATEP), STATE4
247247

248-
mov SRC, %r8
249-
and $0xF, %r8
250-
jnz .Lad_u_loop
251-
252-
.align 8
253-
.Lad_a_loop:
254-
movdqa 0x00(SRC), MSG
255-
aegis128_update
256-
pxor MSG, STATE4
257-
sub $0x10, LEN
258-
cmp $0x10, LEN
259-
jl .Lad_out_1
260-
261-
movdqa 0x10(SRC), MSG
262-
aegis128_update
263-
pxor MSG, STATE3
264-
sub $0x10, LEN
265-
cmp $0x10, LEN
266-
jl .Lad_out_2
267-
268-
movdqa 0x20(SRC), MSG
269-
aegis128_update
270-
pxor MSG, STATE2
271-
sub $0x10, LEN
272-
cmp $0x10, LEN
273-
jl .Lad_out_3
274-
275-
movdqa 0x30(SRC), MSG
276-
aegis128_update
277-
pxor MSG, STATE1
278-
sub $0x10, LEN
279-
cmp $0x10, LEN
280-
jl .Lad_out_4
281-
282-
movdqa 0x40(SRC), MSG
283-
aegis128_update
284-
pxor MSG, STATE0
285-
sub $0x10, LEN
286-
cmp $0x10, LEN
287-
jl .Lad_out_0
288-
289-
add $0x50, SRC
290-
jmp .Lad_a_loop
291-
292248
.align 8
293-
.Lad_u_loop:
249+
.Lad_loop:
294250
movdqu 0x00(SRC), MSG
295251
aegis128_update
296252
pxor MSG, STATE4
@@ -327,7 +283,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad)
327283
jl .Lad_out_0
328284

329285
add $0x50, SRC
330-
jmp .Lad_u_loop
286+
jmp .Lad_loop
331287

332288
/* store the state: */
333289
.Lad_out_0:
@@ -380,15 +336,15 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad)
380336
RET
381337
SYM_FUNC_END(crypto_aegis128_aesni_ad)
382338

383-
.macro encrypt_block a s0 s1 s2 s3 s4 i
384-
movdq\a (\i * 0x10)(SRC), MSG
339+
.macro encrypt_block s0 s1 s2 s3 s4 i
340+
movdqu (\i * 0x10)(SRC), MSG
385341
movdqa MSG, T0
386342
pxor \s1, T0
387343
pxor \s4, T0
388344
movdqa \s2, T1
389345
pand \s3, T1
390346
pxor T1, T0
391-
movdq\a T0, (\i * 0x10)(DST)
347+
movdqu T0, (\i * 0x10)(DST)
392348

393349
aegis128_update
394350
pxor MSG, \s4
@@ -415,34 +371,17 @@ SYM_FUNC_START(crypto_aegis128_aesni_enc)
415371
movdqu 0x30(STATEP), STATE3
416372
movdqu 0x40(STATEP), STATE4
417373

418-
mov SRC, %r8
419-
or DST, %r8
420-
and $0xF, %r8
421-
jnz .Lenc_u_loop
422-
423374
.align 8
424-
.Lenc_a_loop:
425-
encrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0
426-
encrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1
427-
encrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2
428-
encrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3
429-
encrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4
375+
.Lenc_loop:
376+
encrypt_block STATE0 STATE1 STATE2 STATE3 STATE4 0
377+
encrypt_block STATE4 STATE0 STATE1 STATE2 STATE3 1
378+
encrypt_block STATE3 STATE4 STATE0 STATE1 STATE2 2
379+
encrypt_block STATE2 STATE3 STATE4 STATE0 STATE1 3
380+
encrypt_block STATE1 STATE2 STATE3 STATE4 STATE0 4
430381

431382
add $0x50, SRC
432383
add $0x50, DST
433-
jmp .Lenc_a_loop
434-
435-
.align 8
436-
.Lenc_u_loop:
437-
encrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0
438-
encrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1
439-
encrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2
440-
encrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3
441-
encrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4
442-
443-
add $0x50, SRC
444-
add $0x50, DST
445-
jmp .Lenc_u_loop
384+
jmp .Lenc_loop
446385

447386
/* store the state: */
448387
.Lenc_out_0:
@@ -535,14 +474,14 @@ SYM_FUNC_START(crypto_aegis128_aesni_enc_tail)
535474
RET
536475
SYM_FUNC_END(crypto_aegis128_aesni_enc_tail)
537476

538-
.macro decrypt_block a s0 s1 s2 s3 s4 i
539-
movdq\a (\i * 0x10)(SRC), MSG
477+
.macro decrypt_block s0 s1 s2 s3 s4 i
478+
movdqu (\i * 0x10)(SRC), MSG
540479
pxor \s1, MSG
541480
pxor \s4, MSG
542481
movdqa \s2, T1
543482
pand \s3, T1
544483
pxor T1, MSG
545-
movdq\a MSG, (\i * 0x10)(DST)
484+
movdqu MSG, (\i * 0x10)(DST)
546485

547486
aegis128_update
548487
pxor MSG, \s4
@@ -569,34 +508,17 @@ SYM_FUNC_START(crypto_aegis128_aesni_dec)
569508
movdqu 0x30(STATEP), STATE3
570509
movdqu 0x40(STATEP), STATE4
571510

572-
mov SRC, %r8
573-
or DST, %r8
574-
and $0xF, %r8
575-
jnz .Ldec_u_loop
576-
577-
.align 8
578-
.Ldec_a_loop:
579-
decrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0
580-
decrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1
581-
decrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2
582-
decrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3
583-
decrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4
584-
585-
add $0x50, SRC
586-
add $0x50, DST
587-
jmp .Ldec_a_loop
588-
589511
.align 8
590-
.Ldec_u_loop:
591-
decrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0
592-
decrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1
593-
decrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2
594-
decrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3
595-
decrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4
512+
.Ldec_loop:
513+
decrypt_block STATE0 STATE1 STATE2 STATE3 STATE4 0
514+
decrypt_block STATE4 STATE0 STATE1 STATE2 STATE3 1
515+
decrypt_block STATE3 STATE4 STATE0 STATE1 STATE2 2
516+
decrypt_block STATE2 STATE3 STATE4 STATE0 STATE1 3
517+
decrypt_block STATE1 STATE2 STATE3 STATE4 STATE0 4
596518

597519
add $0x50, SRC
598520
add $0x50, DST
599-
jmp .Ldec_u_loop
521+
jmp .Ldec_loop
600522

601523
/* store the state: */
602524
.Ldec_out_0:

0 commit comments

Comments
 (0)