Skip to content

Commit 87283db

Browse files
authored
[clang][ARM] Fix build failure in <arm_acle.h> for __swp (#151354)
In commit d598590 I introduced a Sema check that prohibits `__builtin_arm_ldrex` and `__builtin_arm_strex` for data sizes not supported by the target architecture version. However, `arm_acle.h` unconditionally uses those builtins with a 32-bit data size. So now including that header will cause a build failure on Armv6-M, or historic architectures like Armv5. To fix it, `arm_acle.h` now queries the compiler-defined `__ARM_FEATURE_LDREX` macro (also fixed recently in commit 34f59d7 so that it matches the target architecture). If 32-bit LDREX isn't available it will fall back to the older SWP instruction, or failing that (on Armv6-M), a libcall. While I was modifying the header anyway, I also renamed the local variable `v` inside `__swp` so that it starts with `__`, avoiding any risk of user code having #defined `v`.
1 parent c5e6938 commit 87283db

File tree

3 files changed

+52
-28
lines changed

3 files changed

+52
-28
lines changed

clang/lib/Headers/arm_acle.h

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -55,11 +55,37 @@ __chkfeat(uint64_t __features) {
5555
/* 7.5 Swap */
5656
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
5757
__swp(uint32_t __x, volatile uint32_t *__p) {
58-
uint32_t v;
59-
do
60-
v = __builtin_arm_ldrex(__p);
61-
while (__builtin_arm_strex(__x, __p));
62-
return v;
58+
uint32_t __v;
59+
#if (__ARM_FEATURE_LDREX & 4) || __ARM_ARCH_6M__ || __linux__
60+
/*
61+
* Using this clang builtin is sensible in most situations. Where
62+
* LDREX and STREX are available, it will compile to a loop using
63+
* them. Otherwise it will compile to a libcall, requiring the
64+
* runtime to provide that library function.
65+
*
66+
* That's unavoidable on Armv6-M, which has no atomic instructions
67+
* at all (not even SWP), so in that situation the user will just
68+
* have to provide an implementation of __atomic_exchange_4 (perhaps
69+
* it would temporarily disable interrupts, and then do a separate
70+
* load and store).
71+
*
72+
* We also use the libcall strategy on pre-Armv7 Linux targets, on
73+
* the theory that Linux's runtime support library _will_ provide a
74+
* suitable libcall, and it's better to use that than the SWP
75+
* instruction because then when the same binary is run on a later
76+
* Linux system the libcall implementation will use LDREX instead.
77+
*/
78+
__v = __atomic_exchange_n(__p, __x, __ATOMIC_RELAXED);
79+
#else
80+
/*
81+
* But for older Arm architectures when the target is not Linux, we
82+
* fall back to using the SWP instruction via inline assembler. ACLE
83+
* is clear that we're allowed to do this, but shouldn't do it if we
84+
* have a better alternative.
85+
*/
86+
__asm__("swp %0, %1, [%2]" : "=r"(__v) : "r"(__x), "r"(__p) : "memory");
87+
#endif
88+
return __v;
6389
}
6490

6591
/* 7.6 Memory prefetch intrinsics */

clang/test/CodeGen/arm_acle.c

Lines changed: 4 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -139,29 +139,10 @@ void test_dbg(void) {
139139
#endif
140140

141141
/* 8.5 Swap */
142-
// AArch32-LABEL: @test_swp(
143-
// AArch32-NEXT: entry:
144-
// AArch32-NEXT: br label [[DO_BODY_I:%.*]]
145-
// AArch32: do.body.i:
146-
// AArch32-NEXT: [[LDREX_I:%.*]] = call i32 @llvm.arm.ldrex.p0(ptr elementtype(i32) [[P:%.*]])
147-
// AArch32-NEXT: [[STREX_I:%.*]] = call i32 @llvm.arm.strex.p0(i32 [[X:%.*]], ptr elementtype(i32) [[P]])
148-
// AArch32-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[STREX_I]], 0
149-
// AArch32-NEXT: br i1 [[TOBOOL_I]], label [[DO_BODY_I]], label [[__SWP_EXIT:%.*]], !llvm.loop [[LOOP3:![0-9]+]]
150-
// AArch32: __swp.exit:
151-
// AArch32-NEXT: ret void
152-
//
153-
// AArch64-LABEL: @test_swp(
154-
// AArch64-NEXT: entry:
155-
// AArch64-NEXT: br label [[DO_BODY_I:%.*]]
156-
// AArch64: do.body.i:
157-
// AArch64-NEXT: [[LDXR_I:%.*]] = call i64 @llvm.aarch64.ldxr.p0(ptr elementtype(i32) [[P:%.*]])
158-
// AArch64-NEXT: [[TMP0:%.*]] = trunc i64 [[LDXR_I]] to i32
159-
// AArch64-NEXT: [[TMP1:%.*]] = zext i32 [[X:%.*]] to i64
160-
// AArch64-NEXT: [[STXR_I:%.*]] = call i32 @llvm.aarch64.stxr.p0(i64 [[TMP1]], ptr elementtype(i32) [[P]])
161-
// AArch64-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[STXR_I]], 0
162-
// AArch64-NEXT: br i1 [[TOBOOL_I]], label [[DO_BODY_I]], label [[__SWP_EXIT:%.*]], !llvm.loop [[LOOP2:![0-9]+]]
163-
// AArch64: __swp.exit:
164-
// AArch64-NEXT: ret void
142+
// ARM-LABEL: @test_swp(
143+
// ARM-NEXT: entry:
144+
// ARM-NEXT: [[TMP0:%.*]] = atomicrmw volatile xchg ptr [[P:%.*]], i32 [[X:%.*]] monotonic, align 4
145+
// ARM-NEXT: ret void
165146
//
166147
void test_swp(uint32_t x, volatile void *p) {
167148
__swp(x, p);

clang/test/CodeGen/arm_acle_swp.c

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
// RUN: %clang_cc1 -ffreestanding -triple thumbv7m-none-eabi -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s -check-prefix=ATOMIC
2+
// RUN: %clang_cc1 -ffreestanding -triple armv7a-none-eabi -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s -check-prefix=ATOMIC
3+
// RUN: %clang_cc1 -ffreestanding -triple armv6-none-eabi -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s -check-prefix=ATOMIC
4+
// RUN: %clang_cc1 -ffreestanding -triple thumbv6m-none-eabi -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s -check-prefix=ATOMIC
5+
// RUN: %clang_cc1 -ffreestanding -triple armv5-unknown-linux-gnu -target-abi aapcs -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s -check-prefix=ATOMIC
6+
// RUN: %clang_cc1 -ffreestanding -triple armv5-none-eabi -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s -check-prefix=SWP
7+
8+
// REQUIRES: arm-registered-target
9+
10+
#include <arm_acle.h>
11+
12+
// SWP: call i32 asm "swp $0, $1, [$2]", "=r,r,r,~{memory}"(i32 {{.*}}, ptr {{.*}})
13+
14+
// ATOMIC: atomicrmw volatile xchg ptr {{.*}}, i32 {{.*}} monotonic, align 4
15+
uint32_t test_swp(uint32_t x, volatile void *p) {
16+
return __swp(x, p);
17+
}

0 commit comments

Comments
 (0)