Skip to content

Commit 063492c

Browse files
committed
Add aarch64a_soft_nofp variant
Build a soft float multilib variant for aarch64 supporting targets without an FPU. This contains a couple of minor fixes for llvm to enable this; I'll be submitting those upstream. Signed-off-by: Keith Packard <[email protected]>
1 parent 4a2f029 commit 063492c

File tree

3 files changed

+193
-0
lines changed

3 files changed

+193
-0
lines changed

CMakeLists.txt

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,8 @@ set(
291291
${CMAKE_CURRENT_SOURCE_DIR}/patches/llvm-project/0003-Disable-failing-compiler-rt-test.patch
292292
${CMAKE_CURRENT_SOURCE_DIR}/patches/llvm-project/0004-libc-tests-with-picolibc-XFAIL-uses-of-atomics.patch
293293
${CMAKE_CURRENT_SOURCE_DIR}/patches/llvm-project/0005-libc-tests-with-picolibc-mark-two-more-large-tests.patch
294+
${CMAKE_CURRENT_SOURCE_DIR}/patches/llvm-project/0006-libunwind-Support-aarch64-without-FPU.patch
295+
${CMAKE_CURRENT_SOURCE_DIR}/patches/llvm-project/0007-compiler-rt-Support-aarch64-targets-without-FPU.patch
294296
)
295297
FetchContent_Declare(llvmproject
296298
GIT_REPOSITORY https://github.com/llvm/llvm-project.git
@@ -1614,6 +1616,22 @@ add_library_variants_for_cpu(
16141616
RAM_SIZE 0x1000000
16151617
STACK_SIZE 8K
16161618
)
1619+
add_library_variants_for_cpu(
1620+
aarch64a
1621+
SUFFIX soft_nofp
1622+
COMPILE_FLAGS "-march=armv8-a+nofp+nosimd -mabi=aapcs-soft"
1623+
MULTILIB_FLAGS "--target=aarch64-unknown-none-elf -march=armv8-a+nofp+nosimd -mabi=aapcs-soft"
1624+
PICOLIBC_BUILD_TYPE "release"
1625+
QEMU_MACHINE "virt"
1626+
QEMU_CPU "cortex-a57"
1627+
BOOT_FLASH_ADDRESS 0x40000000
1628+
BOOT_FLASH_SIZE 0x1000
1629+
FLASH_ADDRESS 0x40001000
1630+
FLASH_SIZE 0xfff000
1631+
RAM_ADDRESS 0x41000000
1632+
RAM_SIZE 0x1000000
1633+
STACK_SIZE 8K
1634+
)
16171635
# For AArch32, clang uses different defaults for FPU selection than GCC, both
16181636
# when "+fp" or "+fp.dp" are used and when no FPU specifier is provided in
16191637
# "-march=". Using "-mfpu=" explicitly.
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
From dd64908ad215a4f4cc79e3eb507f15b27b04e89f Mon Sep 17 00:00:00 2001
2+
From: Keith Packard <[email protected]>
3+
Date: Fri, 4 Oct 2024 21:06:37 -0700
4+
Subject: [libunwind] Support aarch64 without FPU
5+
6+
Skip save/restore of FPU registers on targets without them.
7+
8+
Signed-off-by: Keith Packard <[email protected]>
9+
---
10+
libunwind/src/UnwindRegistersRestore.S | 4 ++--
11+
libunwind/src/UnwindRegistersSave.S | 2 ++
12+
2 files changed, 4 insertions(+), 2 deletions(-)
13+
14+
diff --git a/libunwind/src/UnwindRegistersRestore.S b/libunwind/src/UnwindRegistersRestore.S
15+
index 180a66582f..1702d016c3 100644
16+
--- a/libunwind/src/UnwindRegistersRestore.S
17+
+++ b/libunwind/src/UnwindRegistersRestore.S
18+
@@ -658,7 +658,7 @@ DEFINE_LIBUNWIND_FUNCTION(__libunwind_Registers_arm64_jumpto)
19+
ldp x26,x27, [x0, #0x0D0]
20+
ldp x28,x29, [x0, #0x0E0]
21+
ldr x30, [x0, #0x100] // restore pc into lr
22+
-
23+
+#if defined(__ARM_FP) && __ARM_FP != 0
24+
ldp d0, d1, [x0, #0x110]
25+
ldp d2, d3, [x0, #0x120]
26+
ldp d4, d5, [x0, #0x130]
27+
@@ -676,7 +676,7 @@ DEFINE_LIBUNWIND_FUNCTION(__libunwind_Registers_arm64_jumpto)
28+
ldp d28,d29, [x0, #0x1F0]
29+
ldr d30, [x0, #0x200]
30+
ldr d31, [x0, #0x208]
31+
-
32+
+#endif
33+
// Finally, restore sp. This must be done after the last read from the
34+
// context struct, because it is allocated on the stack, and an exception
35+
// could clobber the de-allocated portion of the stack after sp has been
36+
diff --git a/libunwind/src/UnwindRegistersSave.S b/libunwind/src/UnwindRegistersSave.S
37+
index fab234fcd6..a489a8ba6d 100644
38+
--- a/libunwind/src/UnwindRegistersSave.S
39+
+++ b/libunwind/src/UnwindRegistersSave.S
40+
@@ -746,6 +746,7 @@ DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
41+
str x1, [x0, #0x0F8]
42+
str x30, [x0, #0x100] // store return address as pc
43+
// skip cpsr
44+
+#if defined(__ARM_FP) && __ARM_FP != 0
45+
stp d0, d1, [x0, #0x110]
46+
stp d2, d3, [x0, #0x120]
47+
stp d4, d5, [x0, #0x130]
48+
@@ -763,6 +764,7 @@ DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
49+
stp d28,d29, [x0, #0x1F0]
50+
str d30, [x0, #0x200]
51+
str d31, [x0, #0x208]
52+
+#endif
53+
mov x0, #0 // return UNW_ESUCCESS
54+
ret
55+
56+
--
57+
2.45.2
58+
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
From 04146b444eb70c449a958b246ae4f4e9b52bf4bc Mon Sep 17 00:00:00 2001
2+
From: Keith Packard <[email protected]>
3+
Date: Fri, 4 Oct 2024 21:08:17 -0700
4+
Subject: [compiler-rt] Support aarch64 targets without FPU
5+
6+
Fall back to the old C implementations of various routines when
7+
the target doesn't have an FPU.
8+
9+
Signed-off-by: Keith Packard <[email protected]>
10+
---
11+
.../builtins/aarch64/sme-libc-mem-routines.S | 2 +-
12+
.../lib/builtins/aarch64/sme-libc-routines.c | 77 +++++++++++++++++++
13+
2 files changed, 78 insertions(+), 1 deletion(-)
14+
15+
diff --git a/compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S b/compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S
16+
index 0318d9a6f1..72d87fb4fa 100644
17+
--- a/compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S
18+
+++ b/compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S
19+
@@ -6,7 +6,7 @@
20+
21+
#include "../assembly.h"
22+
23+
-#ifdef __aarch64__
24+
+#if defined(__aarch64__) && __ARM_FP != 0
25+
26+
#define L(l) .L ## l
27+
28+
diff --git a/compiler-rt/lib/builtins/aarch64/sme-libc-routines.c b/compiler-rt/lib/builtins/aarch64/sme-libc-routines.c
29+
index 315490e73e..92fb953c03 100644
30+
--- a/compiler-rt/lib/builtins/aarch64/sme-libc-routines.c
31+
+++ b/compiler-rt/lib/builtins/aarch64/sme-libc-routines.c
32+
@@ -1,5 +1,82 @@
33+
#include <stddef.h>
34+
35+
+#if __ARM_FP == 0
36+
+// WARNING: When building the scalar versions of these functions you need to
37+
+// use the compiler flag "-mllvm -disable-loop-idiom-all" to prevent clang
38+
+// from recognising a loop idiom and planting calls to memcpy!
39+
+
40+
+static void *__arm_sc_memcpy_fwd(void *dest, const void *src,
41+
+ size_t n) __arm_streaming_compatible {
42+
+ unsigned char *destp = (unsigned char *)dest;
43+
+ const unsigned char *srcp = (const unsigned char *)src;
44+
+ for (size_t i = 0; i < n; ++i)
45+
+ destp[i] = srcp[i];
46+
+
47+
+ return dest;
48+
+}
49+
+
50+
+// If dest and src overlap then behaviour is undefined, hence we can add the
51+
+// restrict keywords here. This also matches the definition of the libc memcpy
52+
+// according to the man page.
53+
+void *__arm_sc_memcpy(void *__restrict__ dest, const void *__restrict__ src,
54+
+ size_t n) __arm_streaming_compatible {
55+
+ return __arm_sc_memcpy_fwd(dest, src, n);
56+
+}
57+
+
58+
+void *__arm_sc_memset(void *dest, int c, size_t n) __arm_streaming_compatible {
59+
+ unsigned char *destp = (unsigned char *)dest;
60+
+ unsigned char c8 = (unsigned char)c;
61+
+ for (size_t i = 0; i < n; ++i)
62+
+ destp[i] = c8;
63+
+
64+
+ return dest;
65+
+}
66+
+
67+
+static void *__arm_sc_memcpy_rev(void *dest, const void *src,
68+
+ size_t n) __arm_streaming_compatible {
69+
+ unsigned char *destp = (unsigned char *)dest;
70+
+ const unsigned char *srcp = (const unsigned char *)src;
71+
+ // TODO: Improve performance by copying larger chunks in reverse, or by
72+
+ // using SVE.
73+
+ while (n > 0) {
74+
+ --n;
75+
+ destp[n] = srcp[n];
76+
+ }
77+
+ return dest;
78+
+}
79+
+
80+
+// Semantically a memmove is equivalent to the following:
81+
+// 1. Copy the entire contents of src to a temporary array that does not
82+
+// overlap with src or dest.
83+
+// 2. Copy the contents of the temporary array into dest.
84+
+void *__arm_sc_memmove(void *dest, const void *src,
85+
+ size_t n) __arm_streaming_compatible {
86+
+ unsigned char *destp = (unsigned char *)dest;
87+
+ const unsigned char *srcp = (const unsigned char *)src;
88+
+
89+
+ // If src and dest don't overlap then just invoke memcpy
90+
+ if ((srcp > (destp + n)) || (destp > (srcp + n)))
91+
+ return __arm_sc_memcpy_fwd(dest, src, n);
92+
+
93+
+ // Overlap case 1:
94+
+ // src: Low | -> | High
95+
+ // dest: Low | -> | High
96+
+ // Here src is always ahead of dest at a higher addres. If we first read a
97+
+ // chunk of data from src we can safely write the same chunk to dest without
98+
+ // corrupting future reads of src.
99+
+ if (srcp > destp)
100+
+ return __arm_sc_memcpy_fwd(dest, src, n);
101+
+
102+
+ // Overlap case 2:
103+
+ // src: Low | -> | High
104+
+ // dest: Low | -> | High
105+
+ // While we're in the overlap region we're always corrupting future reads of
106+
+ // src when writing to dest. An efficient way to do this is to copy the data
107+
+ // in reverse by starting at the highest address.
108+
+ return __arm_sc_memcpy_rev(dest, src, n);
109+
+}
110+
+#endif
111+
+
112+
const void *__arm_sc_memchr(const void *src, int c,
113+
size_t n) __arm_streaming_compatible {
114+
const unsigned char *srcp = (const unsigned char *)src;
115+
--
116+
2.45.2
117+

0 commit comments

Comments
 (0)