Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,8 @@ set(
${CMAKE_CURRENT_SOURCE_DIR}/patches/llvm-project/0003-Disable-failing-compiler-rt-test.patch
${CMAKE_CURRENT_SOURCE_DIR}/patches/llvm-project/0004-libc-tests-with-picolibc-XFAIL-uses-of-atomics.patch
${CMAKE_CURRENT_SOURCE_DIR}/patches/llvm-project/0005-libc-tests-with-picolibc-mark-two-more-large-tests.patch
${CMAKE_CURRENT_SOURCE_DIR}/patches/llvm-project/0006-libunwind-Support-aarch64-without-FPU.patch
${CMAKE_CURRENT_SOURCE_DIR}/patches/llvm-project/0007-compiler-rt-Support-aarch64-targets-without-FPU.patch
)
FetchContent_Declare(llvmproject
GIT_REPOSITORY https://github.com/llvm/llvm-project.git
Expand Down Expand Up @@ -1614,6 +1616,22 @@ add_library_variants_for_cpu(
RAM_SIZE 0x1000000
STACK_SIZE 8K
)
add_library_variants_for_cpu(
aarch64a
SUFFIX soft_nofp
COMPILE_FLAGS "-march=armv8-a+nofp+nosimd -mabi=aapcs-soft"
MULTILIB_FLAGS "--target=aarch64-unknown-none-elf -march=armv8-a+nofp+nosimd -mabi=aapcs-soft"
PICOLIBC_BUILD_TYPE "release"
QEMU_MACHINE "virt"
QEMU_CPU "cortex-a57"
BOOT_FLASH_ADDRESS 0x40000000
BOOT_FLASH_SIZE 0x1000
FLASH_ADDRESS 0x40001000
FLASH_SIZE 0xfff000
RAM_ADDRESS 0x41000000
RAM_SIZE 0x1000000
STACK_SIZE 8K
)
# For AArch32, clang uses different defaults for FPU selection than GCC, both
# when "+fp" or "+fp.dp" are used and when no FPU specifier is provided in
# "-march=". Using "-mfpu=" explicitly.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
From dd64908ad215a4f4cc79e3eb507f15b27b04e89f Mon Sep 17 00:00:00 2001
From: Keith Packard <[email protected]>
Date: Fri, 4 Oct 2024 21:06:37 -0700
Subject: [libunwind] Support aarch64 without FPU

Skip save/restore of FPU registers on targets without them.

Signed-off-by: Keith Packard <[email protected]>
---
libunwind/src/UnwindRegistersRestore.S | 4 ++--
libunwind/src/UnwindRegistersSave.S | 2 ++
2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/libunwind/src/UnwindRegistersRestore.S b/libunwind/src/UnwindRegistersRestore.S
index 180a66582f..1702d016c3 100644
--- a/libunwind/src/UnwindRegistersRestore.S
+++ b/libunwind/src/UnwindRegistersRestore.S
@@ -658,7 +658,7 @@ DEFINE_LIBUNWIND_FUNCTION(__libunwind_Registers_arm64_jumpto)
ldp x26,x27, [x0, #0x0D0]
ldp x28,x29, [x0, #0x0E0]
ldr x30, [x0, #0x100] // restore pc into lr
-
+#if defined(__ARM_FP) && __ARM_FP != 0
ldp d0, d1, [x0, #0x110]
ldp d2, d3, [x0, #0x120]
ldp d4, d5, [x0, #0x130]
@@ -676,7 +676,7 @@ DEFINE_LIBUNWIND_FUNCTION(__libunwind_Registers_arm64_jumpto)
ldp d28,d29, [x0, #0x1F0]
ldr d30, [x0, #0x200]
ldr d31, [x0, #0x208]
-
+#endif
// Finally, restore sp. This must be done after the last read from the
// context struct, because it is allocated on the stack, and an exception
// could clobber the de-allocated portion of the stack after sp has been
diff --git a/libunwind/src/UnwindRegistersSave.S b/libunwind/src/UnwindRegistersSave.S
index fab234fcd6..a489a8ba6d 100644
--- a/libunwind/src/UnwindRegistersSave.S
+++ b/libunwind/src/UnwindRegistersSave.S
@@ -746,6 +746,7 @@ DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
str x1, [x0, #0x0F8]
str x30, [x0, #0x100] // store return address as pc
// skip cpsr
+#if defined(__ARM_FP) && __ARM_FP != 0
stp d0, d1, [x0, #0x110]
stp d2, d3, [x0, #0x120]
stp d4, d5, [x0, #0x130]
@@ -763,6 +764,7 @@ DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
stp d28,d29, [x0, #0x1F0]
str d30, [x0, #0x200]
str d31, [x0, #0x208]
+#endif
mov x0, #0 // return UNW_ESUCCESS
ret

--
2.45.2

Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
From 04146b444eb70c449a958b246ae4f4e9b52bf4bc Mon Sep 17 00:00:00 2001
From: Keith Packard <[email protected]>
Date: Fri, 4 Oct 2024 21:08:17 -0700
Subject: [compiler-rt] Support aarch64 targets without FPU

Fall back to the old C implementations of various routines when
the target doesn't have an FPU.

Signed-off-by: Keith Packard <[email protected]>
---
.../builtins/aarch64/sme-libc-mem-routines.S | 2 +-
.../lib/builtins/aarch64/sme-libc-routines.c | 77 +++++++++++++++++++
2 files changed, 78 insertions(+), 1 deletion(-)

diff --git a/compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S b/compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S
index 0318d9a6f1..72d87fb4fa 100644
--- a/compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S
+++ b/compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S
@@ -6,7 +6,7 @@

#include "../assembly.h"

-#ifdef __aarch64__
+#if defined(__aarch64__) && __ARM_FP != 0

#define L(l) .L ## l

diff --git a/compiler-rt/lib/builtins/aarch64/sme-libc-routines.c b/compiler-rt/lib/builtins/aarch64/sme-libc-routines.c
index 315490e73e..92fb953c03 100644
--- a/compiler-rt/lib/builtins/aarch64/sme-libc-routines.c
+++ b/compiler-rt/lib/builtins/aarch64/sme-libc-routines.c
@@ -1,5 +1,82 @@
#include <stddef.h>

+#if __ARM_FP == 0
+// WARNING: When building the scalar versions of these functions you need to
+// use the compiler flag "-mllvm -disable-loop-idiom-all" to prevent clang
+// from recognising a loop idiom and planting calls to memcpy!
+
+static void *__arm_sc_memcpy_fwd(void *dest, const void *src,
+ size_t n) __arm_streaming_compatible {
+ unsigned char *destp = (unsigned char *)dest;
+ const unsigned char *srcp = (const unsigned char *)src;
+ for (size_t i = 0; i < n; ++i)
+ destp[i] = srcp[i];
+
+ return dest;
+}
+
+// If dest and src overlap then behaviour is undefined, hence we can add the
+// restrict keywords here. This also matches the definition of the libc memcpy
+// according to the man page.
+void *__arm_sc_memcpy(void *__restrict__ dest, const void *__restrict__ src,
+ size_t n) __arm_streaming_compatible {
+ return __arm_sc_memcpy_fwd(dest, src, n);
+}
+
+void *__arm_sc_memset(void *dest, int c, size_t n) __arm_streaming_compatible {
+ unsigned char *destp = (unsigned char *)dest;
+ unsigned char c8 = (unsigned char)c;
+ for (size_t i = 0; i < n; ++i)
+ destp[i] = c8;
+
+ return dest;
+}
+
+static void *__arm_sc_memcpy_rev(void *dest, const void *src,
+ size_t n) __arm_streaming_compatible {
+ unsigned char *destp = (unsigned char *)dest;
+ const unsigned char *srcp = (const unsigned char *)src;
+ // TODO: Improve performance by copying larger chunks in reverse, or by
+ // using SVE.
+ while (n > 0) {
+ --n;
+ destp[n] = srcp[n];
+ }
+ return dest;
+}
+
+// Semantically a memmove is equivalent to the following:
+// 1. Copy the entire contents of src to a temporary array that does not
+// overlap with src or dest.
+// 2. Copy the contents of the temporary array into dest.
+void *__arm_sc_memmove(void *dest, const void *src,
+ size_t n) __arm_streaming_compatible {
+ unsigned char *destp = (unsigned char *)dest;
+ const unsigned char *srcp = (const unsigned char *)src;
+
+ // If src and dest don't overlap then just invoke memcpy
+ if ((srcp > (destp + n)) || (destp > (srcp + n)))
+ return __arm_sc_memcpy_fwd(dest, src, n);
+
+ // Overlap case 1:
+ // src: Low | -> | High
+ // dest: Low | -> | High
+ // Here src is always ahead of dest at a higher addres. If we first read a
+ // chunk of data from src we can safely write the same chunk to dest without
+ // corrupting future reads of src.
+ if (srcp > destp)
+ return __arm_sc_memcpy_fwd(dest, src, n);
+
+ // Overlap case 2:
+ // src: Low | -> | High
+ // dest: Low | -> | High
+ // While we're in the overlap region we're always corrupting future reads of
+ // src when writing to dest. An efficient way to do this is to copy the data
+ // in reverse by starting at the highest address.
+ return __arm_sc_memcpy_rev(dest, src, n);
+}
+#endif
+
const void *__arm_sc_memchr(const void *src, int c,
size_t n) __arm_streaming_compatible {
const unsigned char *srcp = (const unsigned char *)src;
--
2.45.2