Skip to content

Commit 766d7bd

Browse files
committed
Add initial support for AMO load builtins
This commit adds two Clang builtins for PowerPC AMO load operations: - __builtin_amo_lwat for 32-bit unsigned operations - __builtin_amo_ldat for 64-bit unsigned operations Also adds an amo.h header that maps GCC's AMO functions to these Clang builtins for compatibility.
1 parent 2d8563f commit 766d7bd

File tree

14 files changed

+434
-10
lines changed

14 files changed

+434
-10
lines changed

clang/include/clang/Basic/BuiltinsPPC.def

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1001,6 +1001,10 @@ TARGET_BUILTIN(__builtin_darn_32, "i", "", "isa-v30-instructions")
10011001
TARGET_BUILTIN(__builtin_unpack_vector_int128, "ULLiV1LLLii", "", "vsx")
10021002
TARGET_BUILTIN(__builtin_pack_vector_int128, "V1LLLiULLiULLi", "", "vsx")
10031003

1004+
// AMO builtins
1005+
TARGET_BUILTIN(__builtin_amo_lwat, "UiUi*UiIi", "", "isa-v30-instructions")
1006+
TARGET_BUILTIN(__builtin_amo_ldat, "ULiULi*ULiIi", "", "isa-v30-instructions")
1007+
10041008
// Set the floating point rounding mode
10051009
BUILTIN(__builtin_setrnd, "di", "")
10061010

clang/lib/Headers/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ set(opencl_files
119119

120120
set(ppc_files
121121
altivec.h
122+
amo.h
122123
)
123124

124125
set(ppc_htm_files

clang/lib/Headers/amo.h

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
/*===---- amo.h - PowerPC Atomic Memory Operations ------------------------===*\
2+
*
3+
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
* See https://llvm.org/LICENSE.txt for license information.
5+
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
*
7+
\*===----------------------------------------------------------------------===*/
8+
9+
/* This header provides compatibility wrappers for GCC's AMO functions.
10+
* The functions here call Clang's underlying AMO builtins.
11+
*/
12+
13+
#ifndef _AMO_H
14+
#define _AMO_H
15+
16+
#include <stdint.h>
17+
18+
#ifdef __cplusplus
19+
extern "C" {
20+
#endif
21+
22+
/* AMO Load Operation Codes (FC values) */
23+
enum {
24+
_AMO_LD_ADD = 0x00, /* Fetch and Add */
25+
_AMO_LD_XOR = 0x01, /* Fetch and XOR */
26+
_AMO_LD_IOR = 0x02, /* Fetch and OR */
27+
_AMO_LD_AND = 0x03, /* Fetch and AND */
28+
_AMO_LD_UMAX = 0x04, /* Fetch and Maximum Unsigned */
29+
_AMO_LD_SMAX = 0x05, /* Fetch and Maximum Signed */
30+
_AMO_LD_UMIN = 0x06, /* Fetch and Minimum Unsigned */
31+
_AMO_LD_SMIN = 0x07, /* Fetch and Minimum Signed */
32+
_AMO_LD_SWAP = 0x08 /* Swap */
33+
};
34+
35+
/* 32-bit unsigned AMO load operations */
36+
static inline uint32_t amo_lwat_add(uint32_t *ptr, uint32_t val) {
37+
return __builtin_amo_lwat(ptr, val, _AMO_LD_ADD);
38+
}
39+
40+
static inline uint32_t amo_lwat_xor(uint32_t *ptr, uint32_t val) {
41+
return __builtin_amo_lwat(ptr, val, _AMO_LD_XOR);
42+
}
43+
44+
static inline uint32_t amo_lwat_ior(uint32_t *ptr, uint32_t val) {
45+
return __builtin_amo_lwat(ptr, val, _AMO_LD_IOR);
46+
}
47+
48+
static inline uint32_t amo_lwat_and(uint32_t *ptr, uint32_t val) {
49+
return __builtin_amo_lwat(ptr, val, _AMO_LD_AND);
50+
}
51+
52+
static inline uint32_t amo_lwat_umax(uint32_t *ptr, uint32_t val) {
53+
return __builtin_amo_lwat(ptr, val, _AMO_LD_UMAX);
54+
}
55+
56+
static inline uint32_t amo_lwat_umin(uint32_t *ptr, uint32_t val) {
57+
return __builtin_amo_lwat(ptr, val, _AMO_LD_UMIN);
58+
}
59+
60+
static inline uint32_t amo_lwat_swap(uint32_t *ptr, uint32_t val) {
61+
return __builtin_amo_lwat(ptr, val, _AMO_LD_SWAP);
62+
}
63+
64+
/* 64-bit unsigned AMO load operations */
65+
static inline uint64_t amo_ldat_add(uint64_t *ptr, uint64_t val) {
66+
return __builtin_amo_ldat(ptr, val, _AMO_LD_ADD);
67+
}
68+
69+
static inline uint64_t amo_ldat_xor(uint64_t *ptr, uint64_t val) {
70+
return __builtin_amo_ldat(ptr, val, _AMO_LD_XOR);
71+
}
72+
73+
static inline uint64_t amo_ldat_ior(uint64_t *ptr, uint64_t val) {
74+
return __builtin_amo_ldat(ptr, val, _AMO_LD_IOR);
75+
}
76+
77+
static inline uint64_t amo_ldat_and(uint64_t *ptr, uint64_t val) {
78+
return __builtin_amo_ldat(ptr, val, _AMO_LD_AND);
79+
}
80+
81+
static inline uint64_t amo_ldat_umax(uint64_t *ptr, uint64_t val) {
82+
return __builtin_amo_ldat(ptr, val, _AMO_LD_UMAX);
83+
}
84+
85+
static inline uint64_t amo_ldat_umin(uint64_t *ptr, uint64_t val) {
86+
return __builtin_amo_ldat(ptr, val, _AMO_LD_UMIN);
87+
}
88+
89+
static inline uint64_t amo_ldat_swap(uint64_t *ptr, uint64_t val) {
90+
return __builtin_amo_ldat(ptr, val, _AMO_LD_SWAP);
91+
}
92+
93+
#ifdef __cplusplus
94+
}
95+
#endif
96+
97+
#endif /* _AMO_H */

clang/lib/Sema/SemaPPC.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,8 @@ static bool isPPC_64Builtin(unsigned BuiltinID) {
8787
case PPC::BI__builtin_ppc_fetch_and_andlp:
8888
case PPC::BI__builtin_ppc_fetch_and_orlp:
8989
case PPC::BI__builtin_ppc_fetch_and_swaplp:
90+
case PPC::BI__builtin_amo_lwat:
91+
case PPC::BI__builtin_amo_ldat:
9092
return true;
9193
}
9294
return false;
@@ -253,6 +255,19 @@ bool SemaPPC::CheckPPCBuiltinFunctionCall(const TargetInfo &TI,
253255
case PPC::BI__builtin_##Name: \
254256
return BuiltinPPCMMACall(TheCall, BuiltinID, Types);
255257
#include "clang/Basic/BuiltinsPPC.def"
258+
case PPC::BI__builtin_amo_lwat:
259+
case PPC::BI__builtin_amo_ldat: {
260+
llvm::APSInt Result;
261+
if (SemaRef.BuiltinConstantArg(TheCall, 2, Result))
262+
return true;
263+
unsigned Val = Result.getZExtValue();
264+
if ((Val <= 8) || Val == 16 || (Val >= 24 && Val <= 25) || Val == 28)
265+
return false;
266+
Expr *Arg = TheCall->getArg(2);
267+
return SemaRef.Diag(Arg->getBeginLoc(), diag::err_argument_invalid_range)
268+
<< toString(Result, 10) << "0-8, 16, 24-25" << "28"
269+
<< Arg->getSourceRange();
270+
}
256271
}
257272
llvm_unreachable("must return from switch");
258273
}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
// RUN: not %clang_cc1 -triple powerpc-ibm-aix -target-cpu pwr9 \
2+
// RUN: -emit-llvm %s -o - 2>&1 | FileCheck %s --check-prefix=AIX32-ERROR
3+
// RUN: not %clang_cc1 -triple powerpc64-ibm-aix -target-cpu pwr9 \
4+
// RUN: -emit-llvm %s -o - 2>&1 | FileCheck %s --check-prefix=FC-ERROR
5+
6+
void test_amo() {
7+
unsigned int *ptr1, value1;
8+
// AIX32-ERROR: error: this builtin is only available on 64-bit targets
9+
__builtin_amo_lwat(ptr1, value1, 0);
10+
// FC-ERROR: argument value 9 is outside the valid range [0-8, 16, 24-25, 28]
11+
__builtin_amo_lwat(ptr1, value1, 9);
12+
13+
unsigned long int *ptr2, value2;
14+
// AIX32-ERROR: error: this builtin is only available on 64-bit targets
15+
__builtin_amo_ldat(ptr2, value2, 3);
16+
// FC-ERROR: error: argument value 26 is outside the valid range [0-8, 16, 24-25, 28]
17+
__builtin_amo_ldat(ptr2, value2, 26);
18+
}
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
2+
// RUN: %clang_cc1 -O3 -triple powerpc64le-unknown-unknown -target-cpu pwr9 \
3+
// RUN: -emit-llvm %s -o - | FileCheck %s
4+
// RUN: %clang_cc1 -O3 -triple powerpc64-ibm-aix -target-cpu pwr9 \
5+
// RUN: -emit-llvm %s -o - | FileCheck %s --check-prefix=AIX
6+
7+
// CHECK-LABEL: define dso_local void @test_unsigned_lwat(
8+
// CHECK-SAME: ptr noundef [[PTR:%.*]], i32 noundef zeroext [[VALUE:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
9+
// CHECK-NEXT: [[ENTRY:.*:]]
10+
// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.ppc.amo.lwat(ptr [[PTR]], i32 [[VALUE]], i32 0)
11+
// CHECK-NEXT: store i32 [[TMP0]], ptr [[RESP]], align 4, !tbaa [[INT_TBAA2:![0-9]+]]
12+
// CHECK-NEXT: ret void
13+
//
14+
// AIX-LABEL: define void @test_unsigned_lwat(
15+
// AIX-SAME: ptr noundef [[PTR:%.*]], i32 noundef zeroext [[VALUE:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
16+
// AIX-NEXT: [[ENTRY:.*:]]
17+
// AIX-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.ppc.amo.lwat(ptr [[PTR]], i32 [[VALUE]], i32 0)
18+
// AIX-NEXT: store i32 [[TMP0]], ptr [[RESP]], align 4, !tbaa [[INT_TBAA2:![0-9]+]]
19+
// AIX-NEXT: ret void
20+
//
21+
void test_unsigned_lwat(unsigned int *ptr, unsigned int value, unsigned int * resp) {
22+
unsigned int res = __builtin_amo_lwat(ptr, value, 0);
23+
*resp = res;
24+
}
25+
26+
// CHECK-LABEL: define dso_local void @test_unsigned_ldat(
27+
// CHECK-SAME: ptr noundef [[PTR:%.*]], i64 noundef [[VALUE:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
28+
// CHECK-NEXT: [[ENTRY:.*:]]
29+
// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.ppc.amo.ldat(ptr [[PTR]], i64 [[VALUE]], i32 3)
30+
// CHECK-NEXT: store i64 [[TMP0]], ptr [[RESP]], align 8, !tbaa [[LONG_TBAA6:![0-9]+]]
31+
// CHECK-NEXT: ret void
32+
//
33+
// AIX-LABEL: define void @test_unsigned_ldat(
34+
// AIX-SAME: ptr noundef [[PTR:%.*]], i64 noundef [[VALUE:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
35+
// AIX-NEXT: [[ENTRY:.*:]]
36+
// AIX-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.ppc.amo.ldat(ptr [[PTR]], i64 [[VALUE]], i32 3)
37+
// AIX-NEXT: store i64 [[TMP0]], ptr [[RESP]], align 8, !tbaa [[LONG_TBAA6:![0-9]+]]
38+
// AIX-NEXT: ret void
39+
//
40+
void test_unsigned_ldat(unsigned long int *ptr, unsigned long int value, unsigned long int * resp) {
41+
unsigned long int res = __builtin_amo_ldat(ptr, value, 3);
42+
*resp = res;
43+
}
44+
//.
45+
// CHECK: [[INT_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
46+
// CHECK: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0}
47+
// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
48+
// CHECK: [[META5]] = !{!"Simple C/C++ TBAA"}
49+
// CHECK: [[LONG_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0}
50+
// CHECK: [[META7]] = !{!"long", [[META4]], i64 0}
51+
//.
52+
// AIX: [[INT_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
53+
// AIX: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0}
54+
// AIX: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
55+
// AIX: [[META5]] = !{!"Simple C/C++ TBAA"}
56+
// AIX: [[LONG_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0}
57+
// AIX: [[META7]] = !{!"long", [[META4]], i64 0}
58+
//.
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
// REQUIRES: powerpc-registered-target
2+
// RUN: %clang_cc1 -triple powerpc64le-unknown-linux-gnu -target-cpu pwr9 \
3+
// RUN: -emit-llvm %s -o - | FileCheck %s
4+
// RUN: %clang_cc1 -triple powerpc64-ibm-aix -target-cpu pwr9 \
5+
// RUN: -emit-llvm %s -o - | FileCheck %s
6+
7+
#include <amo.h>
8+
9+
uint32_t test_lwat_add(uint32_t *ptr, uint32_t val) {
10+
// CHECK-LABEL: @test_lwat_add
11+
// CHECK: call i32 @llvm.ppc.amo.lwat(ptr %{{.*}}, i32 %{{.*}}, i32 0)
12+
return amo_lwat_add(ptr, val);
13+
}
14+
15+
uint32_t test_lwat_xor(uint32_t *ptr, uint32_t val) {
16+
// CHECK-LABEL: @test_lwat_xor
17+
// CHECK: call i32 @llvm.ppc.amo.lwat(ptr %{{.*}}, i32 %{{.*}}, i32 1)
18+
return amo_lwat_xor(ptr, val);
19+
}
20+
21+
uint32_t test_lwat_ior(uint32_t *ptr, uint32_t val) {
22+
// CHECK-LABEL: @test_lwat_ior
23+
// CHECK: call i32 @llvm.ppc.amo.lwat(ptr %{{.*}}, i32 %{{.*}}, i32 2)
24+
return amo_lwat_ior(ptr, val);
25+
}
26+
27+
uint32_t test_lwat_and(uint32_t *ptr, uint32_t val) {
28+
// CHECK-LABEL: @test_lwat_and
29+
// CHECK: call i32 @llvm.ppc.amo.lwat(ptr %{{.*}}, i32 %{{.*}}, i32 3)
30+
return amo_lwat_and(ptr, val);
31+
}
32+
33+
uint32_t test_lwat_umax(uint32_t *ptr, uint32_t val) {
34+
// CHECK-LABEL: @test_lwat_umax
35+
// CHECK: call i32 @llvm.ppc.amo.lwat(ptr %{{.*}}, i32 %{{.*}}, i32 4)
36+
return amo_lwat_umax(ptr, val);
37+
}
38+
39+
uint32_t test_lwat_umin(uint32_t *ptr, uint32_t val) {
40+
// CHECK-LABEL: @test_lwat_umin
41+
// CHECK: call i32 @llvm.ppc.amo.lwat(ptr %{{.*}}, i32 %{{.*}}, i32 6)
42+
return amo_lwat_umin(ptr, val);
43+
}
44+
45+
uint32_t test_lwat_swap(uint32_t *ptr, uint32_t val) {
46+
// CHECK-LABEL: @test_lwat_swap
47+
// CHECK: call i32 @llvm.ppc.amo.lwat(ptr %{{.*}}, i32 %{{.*}}, i32 8)
48+
return amo_lwat_swap(ptr, val);
49+
}
50+
51+
uint64_t test_ldat_add(uint64_t *ptr, uint64_t val) {
52+
// CHECK-LABEL: @test_ldat_add
53+
// CHECK: call i64 @llvm.ppc.amo.ldat(ptr %{{.*}}, i64 %{{.*}}, i32 0)
54+
return amo_ldat_add(ptr, val);
55+
}
56+
57+
uint64_t test_ldat_xor(uint64_t *ptr, uint64_t val) {
58+
// CHECK-LABEL: @test_ldat_xor
59+
// CHECK: call i64 @llvm.ppc.amo.ldat(ptr %{{.*}}, i64 %{{.*}}, i32 1)
60+
return amo_ldat_xor(ptr, val);
61+
}
62+
63+
uint64_t test_ldat_ior(uint64_t *ptr, uint64_t val) {
64+
// CHECK-LABEL: @test_ldat_ior
65+
// CHECK: call i64 @llvm.ppc.amo.ldat(ptr %{{.*}}, i64 %{{.*}}, i32 2)
66+
return amo_ldat_ior(ptr, val);
67+
}
68+
69+
uint64_t test_ldat_and(uint64_t *ptr, uint64_t val) {
70+
// CHECK-LABEL: @test_ldat_and
71+
// CHECK: call i64 @llvm.ppc.amo.ldat(ptr %{{.*}}, i64 %{{.*}}, i32 3)
72+
return amo_ldat_and(ptr, val);
73+
}
74+
75+
uint64_t test_ldat_umax(uint64_t *ptr, uint64_t val) {
76+
// CHECK-LABEL: @test_ldat_umax
77+
// CHECK: call i64 @llvm.ppc.amo.ldat(ptr %{{.*}}, i64 %{{.*}}, i32 4)
78+
return amo_ldat_umax(ptr, val);
79+
}
80+
81+
uint64_t test_ldat_umin(uint64_t *ptr, uint64_t val) {
82+
// CHECK-LABEL: @test_ldat_umin
83+
// CHECK: call i64 @llvm.ppc.amo.ldat(ptr %{{.*}}, i64 %{{.*}}, i32 6)
84+
return amo_ldat_umin(ptr, val);
85+
}
86+
87+
uint64_t test_ldat_swap(uint64_t *ptr, uint64_t val) {
88+
// CHECK-LABEL: @test_ldat_swap
89+
// CHECK: call i64 @llvm.ppc.amo.ldat(ptr %{{.*}}, i64 %{{.*}}, i32 8)
90+
return amo_ldat_swap(ptr, val);
91+
}

llvm/include/llvm/IR/IntrinsicsPowerPC.td

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2132,3 +2132,15 @@ let TargetPrefix = "ppc" in {
21322132
Intrinsic<[], [llvm_i64_ty, llvm_i64_ty, llvm_ptr_ty],
21332133
[IntrArgMemOnly, IntrWriteMem, NoCapture<ArgIndex<2>>]>;
21342134
}
2135+
2136+
// AMO intrisics
2137+
let TargetPrefix = "ppc" in {
2138+
def int_ppc_amo_lwat : ClangBuiltin<"__builtin_amo_lwat">,
2139+
DefaultAttrsIntrinsic<[llvm_i32_ty],[llvm_ptr_ty,
2140+
llvm_i32_ty, llvm_i32_ty],
2141+
[IntrArgMemOnly, ImmArg<ArgIndex<2>>]>;
2142+
def int_ppc_amo_ldat : ClangBuiltin<"__builtin_amo_ldat">,
2143+
DefaultAttrsIntrinsic<[llvm_i64_ty],[llvm_ptr_ty,
2144+
llvm_i64_ty, llvm_i32_ty],
2145+
[IntrArgMemOnly, ImmArg<ArgIndex<2>>]>;
2146+
}

0 commit comments

Comments
 (0)