Skip to content

Commit 898160c

Browse files
committed
ARMv8: enable PAC and BTI support
Using the description in [1], the LC_ASM_END macro is defined for ARMv8 to enable PAC and BTI support: $ readelf -n build/libleancrypto.so Displaying notes found in: .note.gnu.property Owner Data size Description GNU 0x00000010 NT_GNU_PROPERTY_TYPE_0 Properties: AArch64 feature: BTI, PAC The changes in the assembly code only ensures that the PAC instructions are placed at the proper locations. Yet, the changes do not alter the functionality. This patch, however, disables the PAC/BTI support on Apple because it yet would cause a SIGILL. [1] https://community.arm.com/arm-community-blogs/b/architectures-and-processors-blog/posts/enabling-pac-and-bti-on-aarch64 Signed-off-by: Stephan Mueller <smueller@chronox.de>
1 parent 123ffab commit 898160c

File tree

14 files changed

+188
-103
lines changed

14 files changed

+188
-103
lines changed

CHANGES.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ Changes 1.2.1-prerelease
99

1010
* Intel assembler: add endbr[64|32] to every function and ensure IBT is enabled
1111

12+
* ARMv8 assembler / ELF: add BTI and PAC support
13+
1214
Changes 1.2.0
1315
* Locking für seeded_rng added to avoid requiring the caller providing a lock
1416

TODO

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,7 @@
2424
- SHA2 Neon: check whether it works on ARMv7
2525

2626
- RISCV asm: check whether it works for RISCV32
27+
28+
- Apple: enable ARM PAC in assembler_support.h
29+
30+
- ARMv8: enable BTI in assembly code (I need to learn where it is place it)

curve25519/src/armv8/X25519-AArch64.S

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -43,14 +43,22 @@
4343

4444
#include "assembler_support.h"
4545

46+
#ifdef __APPLE__
47+
#define load64unaligned_macro _load64unaligned
48+
#define load256unaligned_macro _load256unaligned
49+
#else
50+
#define load64unaligned_macro load64unaligned
51+
#define load256unaligned_macro load256unaligned
52+
#endif
53+
4654
.cpu generic+fp+simd
4755
.text
4856
.align 2
4957
5058
// in: x0: pointer
5159
// out: x0: loaded value
52-
.global load64unaligned
53-
load64unaligned:
60+
SYM_FUNC_START(load64unaligned)
61+
SYM_FUNC_ENTER(load64unaligned)
5462
ldrb w1, [x0]
5563
ldrb w2, [x0, #1]
5664
ldrb w3, [x0, #2]
@@ -71,27 +79,28 @@ load64unaligned:
7179
orr x0, x1, x5, lsl #32
7280
7381
RET
82+
SYM_FUNC_END(load64unaligned)
7483

7584
// in: x0: pointer
7685
// out: x0-x3: loaded value
77-
.global load256unaligned
78-
load256unaligned:
86+
SYM_FUNC_START(load256unaligned)
87+
SYM_FUNC_ENTER(load256unaligned)
7988
stp x29, x30, [sp, #-64]!
8089
mov x29, sp
8190
stp x19, x20, [sp, #16]
8291
stp x21, x22, [sp, #32]
8392
8493
mov x19, x0
85-
bl load64unaligned
94+
bl load64unaligned_macro
8695
mov x20, x0
8796
add x0, x19, #8
88-
bl load64unaligned
97+
bl load64unaligned_macro
8998
mov x21, x0
9099
add x0, x19, #16
91-
bl load64unaligned
100+
bl load64unaligned_macro
92101
mov x22, x0
93102
add x0, x19, #24
94-
bl load64unaligned
103+
bl load64unaligned_macro
95104
mov x3, x0
96105
97106
mov x0, x20
@@ -102,6 +111,7 @@ load256unaligned:
102111
ldp x21, x22, [sp, #32]
103112
ldp x29, x30, [sp], #64
104113
RET
114+
SYM_FUNC_END(load256unaligned)
105115

106116
// in: x1: scalar pointer, x2: base point pointer
107117
// out: x0: result pointer
@@ -126,7 +136,7 @@ SYM_FUNC_ENTER(crypto_scalarmult_curve25519_armv8)
126136
mov x19, x2 // point
127137
128138
mov x0, x1 // scalar
129-
bl load256unaligned
139+
bl load256unaligned_macro
130140
131141
and x3, x3, #0x7fffffffffffffff
132142
and x0, x0, #0xfffffffffffffff8
@@ -136,7 +146,7 @@ SYM_FUNC_ENTER(crypto_scalarmult_curve25519_armv8)
136146
stp x2, x3, [sp, #104+16]
137147
138148
mov x0, x19 // point
139-
bl load256unaligned
149+
bl load256unaligned_macro
140150
141151
// Unpack point (discard most significant bit)
142152
lsr x12, x0, #51

hash/src/asm/ARMv8A/KeccakP-1600-armv8a-ce.S

Lines changed: 2 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,6 @@
2727

2828
#include "assembler_support.h"
2929

30-
#define AARCH64_SIGN_LINK_REGISTER
31-
#define AARCH64_VALIDATE_LINK_REGISTER
32-
3330
.text
3431

3532
.align 8 // strategic alignment and padding that allows to use
@@ -66,7 +63,7 @@ SYM_SIZE(iotas)
6663

6764
.align 5
6865
KeccakF1600_int:
69-
AARCH64_SIGN_LINK_REGISTER
66+
LC_ASM_ENTER_HARDENING
7067
adr x28,iotas
7168
stp x28,x30,[sp,#16] // 32 bytes on top are mine
7269
b .Loop
@@ -231,14 +228,12 @@ KeccakF1600_int:
231228
bne .Loop
232229

233230
ldr x30,[sp,#24]
234-
AARCH64_VALIDATE_LINK_REGISTER
235231
RET
236232
SYM_FUNC_END(KeccakF1600_int)
237233

238234
SYM_FUNC_START(lc_keccakf1600_arm_asm)
239235
SYM_FUNC_ENTER(lc_keccakf1600_arm_asm)
240236
.align 5
241-
AARCH64_SIGN_LINK_REGISTER
242237
stp x29,x30,[sp,#-128]!
243238
add x29,sp,#0
244239
stp x19,x20,[sp,#16]
@@ -288,14 +283,12 @@ SYM_FUNC_ENTER(lc_keccakf1600_arm_asm)
288283
ldp x25,x26,[x29,#64]
289284
ldp x27,x28,[x29,#80]
290285
ldp x29,x30,[sp],#128
291-
AARCH64_VALIDATE_LINK_REGISTER
292286
RET
293287
SYM_FUNC_END(lc_keccakf1600_arm_asm)
294288

295289
SYM_FUNC_START(lc_keccak_absorb_arm_asm)
296290
SYM_FUNC_ENTER(lc_keccak_absorb_arm_asm)
297291
.align 5
298-
AARCH64_SIGN_LINK_REGISTER
299292
stp x29,x30,[sp,#-128]!
300293
add x29,sp,#0
301294
stp x19,x20,[sp,#16]
@@ -529,14 +522,12 @@ SYM_FUNC_ENTER(lc_keccak_absorb_arm_asm)
529522
ldp x25,x26,[x29,#64]
530523
ldp x27,x28,[x29,#80]
531524
ldp x29,x30,[sp],#128
532-
AARCH64_VALIDATE_LINK_REGISTER
533525
RET
534526
SYM_FUNC_END(lc_keccak_absorb_arm_asm)
535527

536528
SYM_FUNC_START(lc_keccak_squeeze_arm_asm)
537529
SYM_FUNC_ENTER(lc_keccak_squeeze_arm_asm)
538530
.align 5
539-
AARCH64_SIGN_LINK_REGISTER
540531
stp x29,x30,[sp,#-48]!
541532
add x29,sp,#0
542533
stp x19,x20,[sp,#16]
@@ -599,12 +590,12 @@ SYM_FUNC_ENTER(lc_keccak_squeeze_arm_asm)
599590
ldp x19,x20,[sp,#16]
600591
ldp x21,x22,[sp,#32]
601592
ldp x29,x30,[sp],#48
602-
AARCH64_VALIDATE_LINK_REGISTER
603593
RET
604594
SYM_FUNC_END(lc_keccak_squeeze_arm_asm)
605595

606596
.align 5
607597
KeccakF1600_ce:
598+
LC_ASM_ENTER_HARDENING
608599
mov x9,#24
609600
adr x10,iotas
610601
b .Loop_ce
@@ -707,7 +698,6 @@ SYM_FUNC_END(KeccakF1600_ce)
707698
SYM_FUNC_START(lc_keccakf1600_arm_ce)
708699
SYM_FUNC_ENTER(lc_keccakf1600_arm_ce)
709700
.align 5
710-
AARCH64_SIGN_LINK_REGISTER
711701
stp x29,x30,[sp,#-80]!
712702
add x29,sp,#0
713703
stp d8,d9,[sp,#16] // per ABI requirement
@@ -748,14 +738,12 @@ SYM_FUNC_ENTER(lc_keccakf1600_arm_ce)
748738
ldp d12,d13,[sp,#48]
749739
ldp d14,d15,[sp,#64]
750740
ldr x29,[sp],#80
751-
AARCH64_VALIDATE_LINK_REGISTER
752741
RET
753742
SYM_FUNC_END(lc_keccakf1600_arm_ce)
754743

755744
SYM_FUNC_START(lc_keccak_absorb_arm_ce)
756745
SYM_FUNC_ENTER(lc_keccak_absorb_arm_ce)
757746
.align 5
758-
AARCH64_SIGN_LINK_REGISTER
759747
stp x29,x30,[sp,#-80]!
760748
add x29,sp,#0
761749
stp d8,d9,[sp,#16] // per ABI requirement
@@ -971,14 +959,12 @@ SYM_FUNC_ENTER(lc_keccak_absorb_arm_ce)
971959
ldp d12,d13,[sp,#48]
972960
ldp d14,d15,[sp,#64]
973961
ldp x29,x30,[sp],#80
974-
AARCH64_VALIDATE_LINK_REGISTER
975962
RET
976963
SYM_FUNC_END(lc_keccak_absorb_arm_ce)
977964

978965
SYM_FUNC_START(lc_keccak_squeeze_arm_ce)
979966
SYM_FUNC_ENTER(lc_keccak_squeeze_arm_ce)
980967
.align 5
981-
AARCH64_SIGN_LINK_REGISTER
982968
stp x29,x30,[sp,#-16]!
983969
add x29,sp,#0
984970
mov x9,x0
@@ -1034,7 +1020,6 @@ SYM_FUNC_ENTER(lc_keccak_squeeze_arm_ce)
10341020

10351021
.Lsqueeze_done_ce:
10361022
ldr x29,[sp],#16
1037-
AARCH64_VALIDATE_LINK_REGISTER
10381023
RET
10391024
SYM_FUNC_END(lc_keccak_squeeze_arm_ce)
10401025
.byte 75,101,99,99,97,107,45,49,54,48,48,32,97,98,115,111,114,98,32,97,110,100,32,115,113,117,101,101,122,101,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0

hash/src/asm/ARMv8A/KeccakP-1600-armv8a-neon.S

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,7 @@ KeccakP1600_Initialize:
298298
st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0], #64
299299
st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0], #64
300300
st1 { v0.d }[0], [x0], #8
301-
ret
301+
RET
302302

303303

304304
// ----------------------------------------------------------------------------
@@ -311,7 +311,7 @@ KeccakP1600_AddByte:
311311
ldrb w3, [x0, x2]
312312
eor w3, w3, w1
313313
strb w3, [x0, x2]
314-
ret
314+
RET
315315

316316

317317
// ----------------------------------------------------------------------------
@@ -356,7 +356,7 @@ KeccakP1600_AddBytes_BytesLoop: // Same thing but go 1 byte at a time
356356
strb w4, [x0], #1
357357
b KeccakP1600_AddBytes_BytesLoop
358358
KeccakP1600_AddBytes_Exit:
359-
ret
359+
RET
360360

361361
// ----------------------------------------------------------------------------
362362
//
@@ -391,7 +391,7 @@ KeccakP1600_OverwriteBytes_BytesLoop:
391391
strb w4, [x0], #1
392392
b KeccakP1600_OverwriteBytes_BytesLoop
393393
KeccakP1600_OverwriteBytes_Exit:
394-
ret
394+
RET
395395

396396

397397
//----------------------------------------------------------------------------
@@ -428,7 +428,7 @@ KeccakP1600_OverwriteWithZeroes_LoopBytes:
428428
strb w2, [x0], #1
429429
b KeccakP1600_OverwriteWithZeroes_LoopBytes
430430
KeccakP1600_OverwriteWithZeroes_Exit:
431-
ret
431+
RET
432432

433433

434434
// ----------------------------------------------------------------------------
@@ -464,7 +464,7 @@ KeccakP1600_ExtractBytes_BytesLoop:
464464
strb w4, [x1], #1
465465
b KeccakP1600_ExtractBytes_BytesLoop
466466
KeccakP1600_ExtractBytes_Exit:
467-
ret
467+
RET
468468

469469

470470
// ----------------------------------------------------------------------------
@@ -509,7 +509,7 @@ KeccakP1600_ExtractAndAddBytes_BytesLoop:
509509
strb w5, [x2], #1
510510
b KeccakP1600_ExtractAndAddBytes_BytesLoop
511511
KeccakP1600_ExtractAndAddBytes_Exit:
512-
ret
512+
RET
513513

514514
// ----------------------------------------------------------------------------
515515
//
@@ -561,6 +561,6 @@ KeccakP1600_Permute_RoundLoop:
561561
bne KeccakP1600_Permute_RoundLoop
562562
KeccakP1600_Permute_Exit:
563563
StoreState
564-
ret
564+
RET
565565

566566
LC_ASM_END

hash/src/asm/ARMv8A/sha2-256-ARMv8.S

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1006,7 +1006,7 @@ sha256_block_data_order:
10061006
ldp x27,x28,[x29,#10*__SIZEOF_POINTER__]
10071007
ldp x29,x30,[sp],#16*__SIZEOF_POINTER__
10081008
.inst 0xd50323bf // autiasp
1009-
ret
1009+
RET
10101010
.size sha256_block_data_order,.-sha256_block_data_order
10111011
#endif
10121012

hash/src/asm/ARMv8A/sha2-512-ARMv8.S

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@
3434
SYM_FUNC_START(sha512_block_neon)
3535
SYM_FUNC_ENTER(sha512_block_neon)
3636
.align 6
37-
.inst 0xd503233f // paciasp
3837
stp x29,x30,[sp,#-16*__SIZEOF_POINTER__]!
3938
add x29,sp,#0
4039

@@ -995,7 +994,6 @@ SYM_FUNC_ENTER(sha512_block_neon)
995994
ldp x25,x26,[x29,#8*__SIZEOF_POINTER__]
996995
ldp x27,x28,[x29,#10*__SIZEOF_POINTER__]
997996
ldp x29,x30,[sp],#16*__SIZEOF_POINTER__
998-
.inst 0xd50323bf // autiasp
999997
RET
1000998
SYM_FUNC_END(sha512_block_neon)
1001999

@@ -1565,5 +1563,4 @@ SYM_FUNC_ENTER(sha512_block_armv8ce)
15651563
RET
15661564
SYM_FUNC_END(sha512_block_armv8ce)
15671565

1568-
15691566
LC_ASM_END

hash/src/asm/ARMv8_2x/keccakx2_armce.S

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
4545
SOFTWARE.
4646
*/
4747

48+
#include "assembler_support.h"
49+
4850
#ifndef __APPLE__
4951
#define __APPLE__ 0
5052
#endif
@@ -53,8 +55,6 @@ SOFTWARE.
5355
#endif
5456
#if (__APPLE__ && __ARM_FEATURE_CRYPTO) || (__ARM_FEATURE_SHA3)
5557

56-
#include "assembler_support.h"
57-
5858
.macro round
5959
; Execute theta, but without xoring into the state yet.
6060
; Compute parities p[i] = a[i] ^ a[5+i] ^ ... ^ a[20+i].
@@ -190,10 +190,10 @@ loop:
190190
ldp d10, d11, [sp], #16
191191
ldp d8, d9, [sp], #16
192192

193-
ret lr
193+
RET lr
194194

195195
SYM_FUNC_END(keccak_f1600x2_armce)
196196

197-
LC_ASM_END
198-
199197
#endif
198+
199+
LC_ASM_END

0 commit comments

Comments
 (0)