@@ -245,52 +245,8 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad)
245
245
movdqu 0x30 (STATEP), STATE3
246
246
movdqu 0x40 (STATEP), STATE4
247
247
248
- mov SRC, %r8
249
- and $0xF , %r8
250
- jnz .Lad_u_loop
251
-
252
- .align 8
253
- .Lad_a_loop:
254
- movdqa 0x00 (SRC), MSG
255
- aegis128_update
256
- pxor MSG, STATE4
257
- sub $0x10 , LEN
258
- cmp $0x10 , LEN
259
- jl .Lad_out_1
260
-
261
- movdqa 0x10 (SRC), MSG
262
- aegis128_update
263
- pxor MSG, STATE3
264
- sub $0x10 , LEN
265
- cmp $0x10 , LEN
266
- jl .Lad_out_2
267
-
268
- movdqa 0x20 (SRC), MSG
269
- aegis128_update
270
- pxor MSG, STATE2
271
- sub $0x10 , LEN
272
- cmp $0x10 , LEN
273
- jl .Lad_out_3
274
-
275
- movdqa 0x30 (SRC), MSG
276
- aegis128_update
277
- pxor MSG, STATE1
278
- sub $0x10 , LEN
279
- cmp $0x10 , LEN
280
- jl .Lad_out_4
281
-
282
- movdqa 0x40 (SRC), MSG
283
- aegis128_update
284
- pxor MSG, STATE0
285
- sub $0x10 , LEN
286
- cmp $0x10 , LEN
287
- jl .Lad_out_0
288
-
289
- add $0x50 , SRC
290
- jmp .Lad_a_loop
291
-
292
248
.align 8
293
- .Lad_u_loop :
249
+ .Lad_loop :
294
250
movdqu 0x00 (SRC), MSG
295
251
aegis128_update
296
252
pxor MSG, STATE4
@@ -327,7 +283,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad)
327
283
jl .Lad_out_0
328
284
329
285
add $0x50 , SRC
330
- jmp .Lad_u_loop
286
+ jmp .Lad_loop
331
287
332
288
/* store the state: */
333
289
.Lad_out_0:
@@ -380,15 +336,15 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad)
380
336
RET
381
337
SYM_FUNC_END(crypto_aegis128_aesni_ad)
382
338
383
- .macro encrypt_block a s0 s1 s2 s3 s4 i
384
- movdq \a (\i * 0x10 )(SRC), MSG
339
+ .macro encrypt_block s0 s1 s2 s3 s4 i
340
+ movdqu (\i * 0x10 )(SRC), MSG
385
341
movdqa MSG, T0
386
342
pxor \s1, T0
387
343
pxor \s4, T0
388
344
movdqa \s2, T1
389
345
pand \s3, T1
390
346
pxor T1, T0
391
- movdq \a T0, (\i * 0x10 )(DST)
347
+ movdqu T0, (\i * 0x10 )(DST)
392
348
393
349
aegis128_update
394
350
pxor MSG, \s4
@@ -415,34 +371,17 @@ SYM_FUNC_START(crypto_aegis128_aesni_enc)
415
371
movdqu 0x30 (STATEP), STATE3
416
372
movdqu 0x40 (STATEP), STATE4
417
373
418
- mov SRC, %r8
419
- or DST, %r8
420
- and $0xF , %r8
421
- jnz .Lenc_u_loop
422
-
423
374
.align 8
424
- .Lenc_a_loop :
425
- encrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0
426
- encrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1
427
- encrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2
428
- encrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3
429
- encrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4
375
+ .Lenc_loop :
376
+ encrypt_block STATE0 STATE1 STATE2 STATE3 STATE4 0
377
+ encrypt_block STATE4 STATE0 STATE1 STATE2 STATE3 1
378
+ encrypt_block STATE3 STATE4 STATE0 STATE1 STATE2 2
379
+ encrypt_block STATE2 STATE3 STATE4 STATE0 STATE1 3
380
+ encrypt_block STATE1 STATE2 STATE3 STATE4 STATE0 4
430
381
431
382
add $0x50 , SRC
432
383
add $0x50 , DST
433
- jmp .Lenc_a_loop
434
-
435
- .align 8
436
- .Lenc_u_loop:
437
- encrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0
438
- encrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1
439
- encrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2
440
- encrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3
441
- encrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4
442
-
443
- add $0x50 , SRC
444
- add $0x50 , DST
445
- jmp .Lenc_u_loop
384
+ jmp .Lenc_loop
446
385
447
386
/* store the state: */
448
387
.Lenc_out_0:
@@ -535,14 +474,14 @@ SYM_FUNC_START(crypto_aegis128_aesni_enc_tail)
535
474
RET
536
475
SYM_FUNC_END(crypto_aegis128_aesni_enc_tail)
537
476
538
- .macro decrypt_block a s0 s1 s2 s3 s4 i
539
- movdq \a (\i * 0x10 )(SRC), MSG
477
+ .macro decrypt_block s0 s1 s2 s3 s4 i
478
+ movdqu (\i * 0x10 )(SRC), MSG
540
479
pxor \s1, MSG
541
480
pxor \s4, MSG
542
481
movdqa \s2, T1
543
482
pand \s3, T1
544
483
pxor T1, MSG
545
- movdq \a MSG, (\i * 0x10 )(DST)
484
+ movdqu MSG, (\i * 0x10 )(DST)
546
485
547
486
aegis128_update
548
487
pxor MSG, \s4
@@ -569,34 +508,17 @@ SYM_FUNC_START(crypto_aegis128_aesni_dec)
569
508
movdqu 0x30 (STATEP), STATE3
570
509
movdqu 0x40 (STATEP), STATE4
571
510
572
- mov SRC, %r8
573
- or DST, %r8
574
- and $0xF , %r8
575
- jnz .Ldec_u_loop
576
-
577
- .align 8
578
- .Ldec_a_loop:
579
- decrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0
580
- decrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1
581
- decrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2
582
- decrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3
583
- decrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4
584
-
585
- add $0x50 , SRC
586
- add $0x50 , DST
587
- jmp .Ldec_a_loop
588
-
589
511
.align 8
590
- .Ldec_u_loop :
591
- decrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0
592
- decrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1
593
- decrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2
594
- decrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3
595
- decrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4
512
+ .Ldec_loop :
513
+ decrypt_block STATE0 STATE1 STATE2 STATE3 STATE4 0
514
+ decrypt_block STATE4 STATE0 STATE1 STATE2 STATE3 1
515
+ decrypt_block STATE3 STATE4 STATE0 STATE1 STATE2 2
516
+ decrypt_block STATE2 STATE3 STATE4 STATE0 STATE1 3
517
+ decrypt_block STATE1 STATE2 STATE3 STATE4 STATE0 4
596
518
597
519
add $0x50 , SRC
598
520
add $0x50 , DST
599
- jmp .Ldec_u_loop
521
+ jmp .Ldec_loop
600
522
601
523
/* store the state: */
602
524
.Ldec_out_0:
0 commit comments