|
| 1 | +From 04146b444eb70c449a958b246ae4f4e9b52bf4bc Mon Sep 17 00:00:00 2001 |
| 2 | +From: Keith Packard < [email protected]> |
| 3 | +Date: Fri, 4 Oct 2024 21:08:17 -0700 |
| 4 | +Subject: [compiler-rt] Support aarch64 targets without FPU |
| 5 | + |
| 6 | +Fall back to the old C implementations of various routines when |
| 7 | +the target doesn't have an FPU. |
| 8 | + |
| 9 | +Signed-off-by: Keith Packard < [email protected]> |
| 10 | +--- |
| 11 | + .../builtins/aarch64/sme-libc-mem-routines.S | 2 +- |
| 12 | + .../lib/builtins/aarch64/sme-libc-routines.c | 77 +++++++++++++++++++ |
| 13 | + 2 files changed, 78 insertions(+), 1 deletion(-) |
| 14 | + |
| 15 | +diff --git a/compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S b/compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S |
| 16 | +index 0318d9a6f1..72d87fb4fa 100644 |
| 17 | +--- a/compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S |
| 18 | ++++ b/compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S |
| 19 | +@@ -6,7 +6,7 @@ |
| 20 | + |
| 21 | + #include "../assembly.h" |
| 22 | + |
| 23 | +-#ifdef __aarch64__ |
| 24 | ++#if defined(__aarch64__) && __ARM_FP != 0 |
| 25 | + |
| 26 | + #define L(l) .L ## l |
| 27 | + |
| 28 | +diff --git a/compiler-rt/lib/builtins/aarch64/sme-libc-routines.c b/compiler-rt/lib/builtins/aarch64/sme-libc-routines.c |
| 29 | +index 315490e73e..92fb953c03 100644 |
| 30 | +--- a/compiler-rt/lib/builtins/aarch64/sme-libc-routines.c |
| 31 | ++++ b/compiler-rt/lib/builtins/aarch64/sme-libc-routines.c |
| 32 | +@@ -1,5 +1,82 @@ |
| 33 | + #include <stddef.h> |
| 34 | + |
| 35 | ++#if __ARM_FP == 0 |
| 36 | ++// WARNING: When building the scalar versions of these functions you need to |
| 37 | ++// use the compiler flag "-mllvm -disable-loop-idiom-all" to prevent clang |
| 38 | ++// from recognising a loop idiom and planting calls to memcpy! |
| 39 | ++ |
| 40 | ++static void *__arm_sc_memcpy_fwd(void *dest, const void *src, |
| 41 | ++ size_t n) __arm_streaming_compatible { |
| 42 | ++ unsigned char *destp = (unsigned char *)dest; |
| 43 | ++ const unsigned char *srcp = (const unsigned char *)src; |
| 44 | ++ for (size_t i = 0; i < n; ++i) |
| 45 | ++ destp[i] = srcp[i]; |
| 46 | ++ |
| 47 | ++ return dest; |
| 48 | ++} |
| 49 | ++ |
| 50 | ++// If dest and src overlap then behaviour is undefined, hence we can add the |
| 51 | ++// restrict keywords here. This also matches the definition of the libc memcpy |
| 52 | ++// according to the man page. |
| 53 | ++void *__arm_sc_memcpy(void *__restrict__ dest, const void *__restrict__ src, |
| 54 | ++ size_t n) __arm_streaming_compatible { |
| 55 | ++ return __arm_sc_memcpy_fwd(dest, src, n); |
| 56 | ++} |
| 57 | ++ |
| 58 | ++void *__arm_sc_memset(void *dest, int c, size_t n) __arm_streaming_compatible { |
| 59 | ++ unsigned char *destp = (unsigned char *)dest; |
| 60 | ++ unsigned char c8 = (unsigned char)c; |
| 61 | ++ for (size_t i = 0; i < n; ++i) |
| 62 | ++ destp[i] = c8; |
| 63 | ++ |
| 64 | ++ return dest; |
| 65 | ++} |
| 66 | ++ |
| 67 | ++static void *__arm_sc_memcpy_rev(void *dest, const void *src, |
| 68 | ++ size_t n) __arm_streaming_compatible { |
| 69 | ++ unsigned char *destp = (unsigned char *)dest; |
| 70 | ++ const unsigned char *srcp = (const unsigned char *)src; |
| 71 | ++ // TODO: Improve performance by copying larger chunks in reverse, or by |
| 72 | ++ // using SVE. |
| 73 | ++ while (n > 0) { |
| 74 | ++ --n; |
| 75 | ++ destp[n] = srcp[n]; |
| 76 | ++ } |
| 77 | ++ return dest; |
| 78 | ++} |
| 79 | ++ |
| 80 | ++// Semantically a memmove is equivalent to the following: |
| 81 | ++// 1. Copy the entire contents of src to a temporary array that does not |
| 82 | ++// overlap with src or dest. |
| 83 | ++// 2. Copy the contents of the temporary array into dest. |
| 84 | ++void *__arm_sc_memmove(void *dest, const void *src, |
| 85 | ++ size_t n) __arm_streaming_compatible { |
| 86 | ++ unsigned char *destp = (unsigned char *)dest; |
| 87 | ++ const unsigned char *srcp = (const unsigned char *)src; |
| 88 | ++ |
| 89 | ++ // If src and dest don't overlap then just invoke memcpy |
| 90 | ++ if ((srcp > (destp + n)) || (destp > (srcp + n))) |
| 91 | ++ return __arm_sc_memcpy_fwd(dest, src, n); |
| 92 | ++ |
| 93 | ++ // Overlap case 1: |
| 94 | ++ // src: Low | -> | High |
| 95 | ++ // dest: Low | -> | High |
| 96 | ++ // Here src is always ahead of dest at a higher addres. If we first read a |
| 97 | ++ // chunk of data from src we can safely write the same chunk to dest without |
| 98 | ++ // corrupting future reads of src. |
| 99 | ++ if (srcp > destp) |
| 100 | ++ return __arm_sc_memcpy_fwd(dest, src, n); |
| 101 | ++ |
| 102 | ++ // Overlap case 2: |
| 103 | ++ // src: Low | -> | High |
| 104 | ++ // dest: Low | -> | High |
| 105 | ++ // While we're in the overlap region we're always corrupting future reads of |
| 106 | ++ // src when writing to dest. An efficient way to do this is to copy the data |
| 107 | ++ // in reverse by starting at the highest address. |
| 108 | ++ return __arm_sc_memcpy_rev(dest, src, n); |
| 109 | ++} |
| 110 | ++#endif |
| 111 | ++ |
| 112 | + const void *__arm_sc_memchr(const void *src, int c, |
| 113 | + size_t n) __arm_streaming_compatible { |
| 114 | + const unsigned char *srcp = (const unsigned char *)src; |
| 115 | +-- |
| 116 | +2.45.2 |
| 117 | + |
0 commit comments