4
4
*
5
5
* Copyright (c) 2017-2018 Ondrej Mosnacek <[email protected] >
6
6
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
7
+ * Copyright 2024 Google LLC
7
8
*/
8
9
9
10
#include <linux/linkage.h>
28
29
.byte 0xdb , 0x3d , 0x18 , 0x55 , 0x6d , 0xc2 , 0x2f , 0xf1
29
30
.byte 0x20 , 0x11 , 0x31 , 0x42 , 0x73 , 0xb5 , 0x28 , 0xdd
30
31
31
- .section .rodata.cst16.aegis128_counter , "aM" , @progbits , 16
32
- .align 16
33
- .Laegis128_counter :
34
- .byte 0x00 , 0x01 , 0x02 , 0x03 , 0x04 , 0x05 , 0x06 , 0x07
35
- .byte 0x08 , 0x09 , 0x0a , 0x0b , 0x0c , 0x0d , 0x0e , 0x0f
32
+ .section .rodata.cst32.zeropad_mask , "aM" , @progbits , 32
33
+ .align 32
34
+ .Lzeropad_mask :
35
+ .octa 0xffffffffffffffffffffffffffffffff
36
+ .octa 0
36
37
37
38
.text
38
39
55
56
.endm
56
57
57
58
/*
58
- * __load_partial: internal ABI
59
- * input:
60
- * LEN - bytes
61
- * SRC - src
62
- * output:
63
- * MSG - message block
64
- * changed:
65
- * T0
66
- * %r8
67
- * %r9
59
+ * Load 1 <= LEN (%ecx) <= 15 bytes from the pointer SRC into the xmm register
60
+ * MSG and zeroize any remaining bytes. Clobbers %rax, %rcx, and %r8.
68
61
*/
69
- SYM_FUNC_START_LOCAL (__load_partial)
70
- .set LEN, %ecx
71
- .set SRC, %rsi
72
- xor %r9d , %r9d
73
- pxor MSG, MSG
74
-
75
- mov LEN, %r8d
76
- and $0x1 , %r8
77
- jz .Lld_partial_1
78
-
79
- mov LEN, %r8d
80
- and $0x1E , %r8
81
- add SRC, %r8
82
- mov (%r8 ), %r9b
83
-
84
- .Lld_partial_1:
85
- mov LEN, %r8d
86
- and $0x2 , %r8
87
- jz .Lld_partial_2
88
-
89
- mov LEN, %r8d
90
- and $0x1C , %r8
91
- add SRC, %r8
92
- shl $0x10 , %r9
93
- mov (%r8 ), %r9w
94
-
95
- .Lld_partial_2:
96
- mov LEN, %r8d
97
- and $0x4 , %r8
98
- jz .Lld_partial_4
99
-
100
- mov LEN, %r8d
101
- and $0x18 , %r8
102
- add SRC, %r8
103
- shl $32 , %r9
104
- mov (%r8 ), %r8d
105
- xor %r8 , %r9
106
-
107
- .Lld_partial_4:
108
- movq %r9 , MSG
109
-
110
- mov LEN, %r8d
111
- and $0x8 , %r8
112
- jz .Lld_partial_8
113
-
114
- mov LEN, %r8d
115
- and $0x10 , %r8
116
- add SRC, %r8
117
- pslldq $8 , MSG
118
- movq (%r8 ), T0
119
- pxor T0, MSG
120
-
121
- .Lld_partial_8:
122
- RET
123
- SYM_FUNC_END(__load_partial)
62
+ .macro load_partial
63
+ sub $8 , %ecx /* LEN - 8 */
64
+ jle .Lle8\@
65
+
66
+ /* Load 9 <= LEN <= 15 bytes: */
67
+ movq (SRC), MSG /* Load first 8 bytes */
68
+ mov (SRC, %rcx ), %rax /* Load last 8 bytes */
69
+ neg %ecx
70
+ shl $3 , %ecx
71
+ shr %cl , %rax /* Discard overlapping bytes */
72
+ pinsrq $1 , %rax , MSG
73
+ jmp .Ldone\@
74
+
75
+ .Lle8\@:
76
+ add $4 , %ecx /* LEN - 4 */
77
+ jl .Llt4\@
78
+
79
+ /* Load 4 <= LEN <= 8 bytes: */
80
+ mov (SRC), %eax /* Load first 4 bytes */
81
+ mov (SRC, %rcx ), %r8d /* Load last 4 bytes */
82
+ jmp .Lcombine\@
83
+
84
+ .Llt4\@:
85
+ /* Load 1 <= LEN <= 3 bytes: */
86
+ add $2 , %ecx /* LEN - 2 */
87
+ movzbl (SRC), %eax /* Load first byte */
88
+ jl .Lmovq\@
89
+ movzwl (SRC, %rcx ), %r8d /* Load last 2 bytes */
90
+ .Lcombine\@:
91
+ shl $3 , %ecx
92
+ shl %cl , %r8
93
+ or %r8 , %rax /* Combine the two parts */
94
+ .Lmovq\@:
95
+ movq %rax , MSG
96
+ .Ldone\@:
97
+ .endm
124
98
125
99
/*
126
- * __store_partial: internal ABI
127
- * input:
128
- * LEN - bytes
129
- * DST - dst
130
- * output:
131
- * T0 - message block
132
- * changed:
133
- * %r8
134
- * %r9
135
- * %r10
100
+ * Store 1 <= LEN (%ecx) <= 15 bytes from the xmm register \msg to the pointer
101
+ * DST. Clobbers %rax, %rcx, and %r8.
136
102
*/
137
- SYM_FUNC_START_LOCAL (__store_partial)
138
- .set LEN, %ecx
139
- .set DST, %rdx
140
- mov LEN, %r8d
141
- mov DST, %r9
142
-
143
- movq T0, %r10
144
-
145
- cmp $8 , %r8
146
- jl .Lst_partial_8
147
-
148
- mov %r10 , (%r9 )
149
- psrldq $8 , T0
150
- movq T0, %r10
151
-
152
- sub $8 , %r8
153
- add $8 , %r9
154
-
155
- .Lst_partial_8:
156
- cmp $4 , %r8
157
- jl .Lst_partial_4
158
-
159
- mov %r10d , (%r9 )
160
- shr $32 , %r10
161
-
162
- sub $4 , %r8
163
- add $4 , %r9
164
-
165
- .Lst_partial_4:
166
- cmp $2 , %r8
167
- jl .Lst_partial_2
168
-
169
- mov %r10w , (%r9 )
170
- shr $0x10 , %r10
171
-
172
- sub $2 , %r8
173
- add $2 , %r9
174
-
175
- .Lst_partial_2:
176
- cmp $1 , %r8
177
- jl .Lst_partial_1
178
-
179
- mov %r10b , (%r9 )
180
-
181
- .Lst_partial_1:
182
- RET
183
- SYM_FUNC_END(__store_partial)
103
+ .macro store_partial msg
104
+ sub $8 , %ecx /* LEN - 8 */
105
+ jl .Llt8\@
106
+
107
+ /* Store 8 <= LEN <= 15 bytes: */
108
+ pextrq $1 , \msg, %rax
109
+ mov %ecx , %r8d
110
+ shl $3 , %ecx
111
+ ror %cl , %rax
112
+ mov %rax , (DST, %r8 ) /* Store last LEN - 8 bytes */
113
+ movq \msg, (DST) /* Store first 8 bytes */
114
+ jmp .Ldone\@
115
+
116
+ .Llt8\@:
117
+ add $4 , %ecx /* LEN - 4 */
118
+ jl .Llt4\@
119
+
120
+ /* Store 4 <= LEN <= 7 bytes: */
121
+ pextrd $1 , \msg, %eax
122
+ mov %ecx , %r8d
123
+ shl $3 , %ecx
124
+ ror %cl , %eax
125
+ mov %eax , (DST, %r8 ) /* Store last LEN - 4 bytes */
126
+ movd \msg, (DST) /* Store first 4 bytes */
127
+ jmp .Ldone\@
128
+
129
+ .Llt4\@:
130
+ /* Store 1 <= LEN <= 3 bytes: */
131
+ pextrb $0 , \msg, 0 (DST)
132
+ cmp $-2 , %ecx /* LEN - 4 == -2, i.e. LEN == 2? */
133
+ jl .Ldone\@
134
+ pextrb $1 , \msg, 1 (DST)
135
+ je .Ldone\@
136
+ pextrb $2 , \msg, 2 (DST)
137
+ .Ldone\@:
138
+ .endm
184
139
185
140
/*
186
141
* void aegis128_aesni_init(struct aegis_state *state,
@@ -453,7 +408,7 @@ SYM_FUNC_START(aegis128_aesni_enc_tail)
453
408
.set STATEP, %rdi
454
409
.set SRC, %rsi
455
410
.set DST, %rdx
456
- .set LEN, %ecx
411
+ .set LEN, %ecx /* {load,store}_partial rely on this being %ecx */
457
412
FRAME_BEGIN
458
413
459
414
/* load the state: */
@@ -464,7 +419,8 @@ SYM_FUNC_START(aegis128_aesni_enc_tail)
464
419
movdqu 0x40 (STATEP), STATE4
465
420
466
421
/* encrypt message: */
467
- call __load_partial
422
+ mov LEN, %r9d
423
+ load_partial
468
424
469
425
movdqa MSG, T0
470
426
pxor STATE1, T0
@@ -473,7 +429,8 @@ SYM_FUNC_START(aegis128_aesni_enc_tail)
473
429
pand STATE3, T1
474
430
pxor T1, T0
475
431
476
- call __store_partial
432
+ mov %r9d , LEN
433
+ store_partial T0
477
434
478
435
aegis128_update
479
436
pxor MSG, STATE4
@@ -598,7 +555,7 @@ SYM_FUNC_START(aegis128_aesni_dec_tail)
598
555
.set STATEP, %rdi
599
556
.set SRC, %rsi
600
557
.set DST, %rdx
601
- .set LEN, %ecx
558
+ .set LEN, %ecx /* {load,store}_partial rely on this being %ecx */
602
559
FRAME_BEGIN
603
560
604
561
/* load the state: */
@@ -609,25 +566,22 @@ SYM_FUNC_START(aegis128_aesni_dec_tail)
609
566
movdqu 0x40 (STATEP), STATE4
610
567
611
568
/* decrypt message: */
612
- call __load_partial
569
+ mov LEN, %r9d
570
+ load_partial
613
571
614
572
pxor STATE1, MSG
615
573
pxor STATE4, MSG
616
574
movdqa STATE2, T1
617
575
pand STATE3, T1
618
576
pxor T1, MSG
619
577
620
- movdqa MSG, T0
621
- call __store_partial
578
+ mov %r9d , LEN
579
+ store_partial MSG
622
580
623
581
/* mask with byte count: */
624
- movd LEN, T0
625
- punpcklbw T0, T0
626
- punpcklbw T0, T0
627
- punpcklbw T0, T0
628
- punpcklbw T0, T0
629
- movdqa .Laegis128_counter(%rip ), T1
630
- pcmpgtb T1, T0
582
+ lea .Lzeropad_mask+16 (%rip ), %rax
583
+ sub %r9 , %rax
584
+ movdqu (%rax ), T0
631
585
pand T0, MSG
632
586
633
587
aegis128_update
0 commit comments