Skip to content

Commit e2bd10c

Browse files
authored
[AMDGPU][gfx1250] Add 128B cooperative atomics (#156418)
- Add clang built-ins + sema/codegen - Add IR Intrinsic + verifier - Add DAG/GlobalISel codegen for the intrinsics - Add lowering in SIMemoryLegalizer using a MMO flag.
1 parent 3ec7b89 commit e2bd10c

24 files changed

+3076
-3
lines changed

clang/include/clang/Basic/BuiltinsAMDGPU.def

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -835,5 +835,15 @@ TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x128_iu8, "V8iIbV8iIbV16iV8iiIbI
835835
TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x64_f16, "V8fIbV16hIbV32hV8fiIbIb", "nc", "gfx1250-insts,wavefrontsize32")
836836
TARGET_BUILTIN(__builtin_amdgcn_swmmac_f16_16x16x64_f16, "V8hIbV16hIbV32hV8hiIbIb", "nc", "gfx1250-insts,wavefrontsize32")
837837

838+
// GFX12.5 128B cooperative atomics
839+
TARGET_BUILTIN(__builtin_amdgcn_cooperative_atomic_load_32x4B, "ii*IicC*", "nc", "gfx1250-insts,wavefrontsize32")
840+
TARGET_BUILTIN(__builtin_amdgcn_cooperative_atomic_store_32x4B, "vi*iIicC*", "nc", "gfx1250-insts,wavefrontsize32")
841+
842+
TARGET_BUILTIN(__builtin_amdgcn_cooperative_atomic_load_16x8B, "V2iV2i*IicC*", "nc", "gfx1250-insts,wavefrontsize32")
843+
TARGET_BUILTIN(__builtin_amdgcn_cooperative_atomic_store_16x8B, "vV2i*V2iIicC*", "nc", "gfx1250-insts,wavefrontsize32")
844+
845+
TARGET_BUILTIN(__builtin_amdgcn_cooperative_atomic_load_8x16B, "V4iV4i*IicC*", "nc", "gfx1250-insts,wavefrontsize32")
846+
TARGET_BUILTIN(__builtin_amdgcn_cooperative_atomic_store_8x16B, "vV4i*V4iIicC*", "nc", "gfx1250-insts,wavefrontsize32")
847+
838848
#undef BUILTIN
839849
#undef TARGET_BUILTIN

clang/include/clang/Basic/DiagnosticSemaKinds.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13609,4 +13609,6 @@ def warn_acc_var_referenced_lacks_op
1360913609
// AMDGCN builtins diagnostics
1361013610
def err_amdgcn_load_lds_size_invalid_value : Error<"invalid size value">;
1361113611
def note_amdgcn_load_lds_size_valid_value : Note<"size must be %select{1, 2, or 4|1, 2, 4, 12 or 16}0">;
13612+
13613+
def err_amdgcn_coop_atomic_invalid_as : Error<"cooperative atomic requires a global or generic pointer">;
1361213614
} // end of sema component.

clang/include/clang/Sema/SemaAMDGPU.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ class SemaAMDGPU : public SemaBase {
2626

2727
bool CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
2828

29+
bool checkCoopAtomicFunctionCall(CallExpr *TheCall, bool IsStore);
30+
2931
bool checkMovDPPFunctionCall(CallExpr *TheCall, unsigned NumArgs,
3032
unsigned NumDataArgs);
3133

clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -701,6 +701,49 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
701701
return emitBuiltinWithOneOverloadedType<5>(*this, E,
702702
Intrinsic::amdgcn_load_to_lds);
703703
}
704+
case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_32x4B:
705+
case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_32x4B:
706+
case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_16x8B:
707+
case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_16x8B:
708+
case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_8x16B:
709+
case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_8x16B: {
710+
Intrinsic::ID IID;
711+
switch (BuiltinID) {
712+
case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_32x4B:
713+
IID = Intrinsic::amdgcn_cooperative_atomic_load_32x4B;
714+
break;
715+
case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_32x4B:
716+
IID = Intrinsic::amdgcn_cooperative_atomic_store_32x4B;
717+
break;
718+
case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_16x8B:
719+
IID = Intrinsic::amdgcn_cooperative_atomic_load_16x8B;
720+
break;
721+
case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_16x8B:
722+
IID = Intrinsic::amdgcn_cooperative_atomic_store_16x8B;
723+
break;
724+
case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_8x16B:
725+
IID = Intrinsic::amdgcn_cooperative_atomic_load_8x16B;
726+
break;
727+
case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_8x16B:
728+
IID = Intrinsic::amdgcn_cooperative_atomic_store_8x16B;
729+
break;
730+
}
731+
732+
LLVMContext &Ctx = CGM.getLLVMContext();
733+
SmallVector<Value *, 5> Args;
734+
// last argument is a MD string
735+
const unsigned ScopeArg = E->getNumArgs() - 1;
736+
for (unsigned i = 0; i != ScopeArg; ++i)
737+
Args.push_back(EmitScalarExpr(E->getArg(i)));
738+
StringRef Arg = cast<StringLiteral>(E->getArg(ScopeArg)->IgnoreParenCasts())
739+
->getString();
740+
llvm::MDNode *MD = llvm::MDNode::get(Ctx, {llvm::MDString::get(Ctx, Arg)});
741+
Args.push_back(llvm::MetadataAsValue::get(Ctx, MD));
742+
// Intrinsic is typed based on the pointer AS. Pointer is always the first
743+
// argument.
744+
llvm::Function *F = CGM.getIntrinsic(IID, {Args[0]->getType()});
745+
return Builder.CreateCall(F, {Args});
746+
}
704747
case AMDGPU::BI__builtin_amdgcn_get_fpenv: {
705748
Function *F = CGM.getIntrinsic(Intrinsic::get_fpenv,
706749
{llvm::Type::getInt64Ty(getLLVMContext())});

clang/lib/Sema/SemaAMDGPU.cpp

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "clang/Basic/TargetBuiltins.h"
1616
#include "clang/Sema/Ownership.h"
1717
#include "clang/Sema/Sema.h"
18+
#include "llvm/Support/AMDGPUAddrSpace.h"
1819
#include "llvm/Support/AtomicOrdering.h"
1920
#include <cstdint>
2021

@@ -100,6 +101,14 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
100101
case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk16_f32_fp6:
101102
case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk16_f32_bf6:
102103
return SemaRef.BuiltinConstantArgRange(TheCall, 2, 0, 7);
104+
case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_32x4B:
105+
case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_16x8B:
106+
case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_8x16B:
107+
return checkCoopAtomicFunctionCall(TheCall, /*IsStore=*/false);
108+
case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_32x4B:
109+
case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_16x8B:
110+
case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_8x16B:
111+
return checkCoopAtomicFunctionCall(TheCall, /*IsStore=*/true);
103112
default:
104113
return false;
105114
}
@@ -145,6 +154,50 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
145154
return false;
146155
}
147156

157+
bool SemaAMDGPU::checkCoopAtomicFunctionCall(CallExpr *TheCall, bool IsStore) {
158+
bool Fail = false;
159+
160+
// First argument is a global or generic pointer.
161+
Expr *PtrArg = TheCall->getArg(0);
162+
QualType PtrTy = PtrArg->getType()->getPointeeType();
163+
unsigned AS = getASTContext().getTargetAddressSpace(PtrTy.getAddressSpace());
164+
if (AS != llvm::AMDGPUAS::FLAT_ADDRESS &&
165+
AS != llvm::AMDGPUAS::GLOBAL_ADDRESS) {
166+
Fail = true;
167+
Diag(TheCall->getBeginLoc(), diag::err_amdgcn_coop_atomic_invalid_as)
168+
<< PtrArg->getSourceRange();
169+
}
170+
171+
// Check atomic ordering
172+
Expr *AtomicOrdArg = TheCall->getArg(IsStore ? 2 : 1);
173+
Expr::EvalResult AtomicOrdArgRes;
174+
if (!AtomicOrdArg->EvaluateAsInt(AtomicOrdArgRes, getASTContext()))
175+
llvm_unreachable("Intrinsic requires imm for atomic ordering argument!");
176+
auto Ord =
177+
llvm::AtomicOrderingCABI(AtomicOrdArgRes.Val.getInt().getZExtValue());
178+
179+
// Atomic ordering cannot be acq_rel in any case, acquire for stores or
180+
// release for loads.
181+
if (!llvm::isValidAtomicOrderingCABI((unsigned)Ord) ||
182+
(Ord == llvm::AtomicOrderingCABI::acq_rel) ||
183+
Ord == (IsStore ? llvm::AtomicOrderingCABI::acquire
184+
: llvm::AtomicOrderingCABI::release)) {
185+
return Diag(AtomicOrdArg->getBeginLoc(),
186+
diag::warn_atomic_op_has_invalid_memory_order)
187+
<< 0 << AtomicOrdArg->getSourceRange();
188+
}
189+
190+
// Last argument is a string literal
191+
Expr *Arg = TheCall->getArg(TheCall->getNumArgs() - 1);
192+
if (!isa<StringLiteral>(Arg->IgnoreParenImpCasts())) {
193+
Fail = true;
194+
Diag(TheCall->getBeginLoc(), diag::err_expr_not_string_literal)
195+
<< Arg->getSourceRange();
196+
}
197+
198+
return Fail;
199+
}
200+
148201
bool SemaAMDGPU::checkMovDPPFunctionCall(CallExpr *TheCall, unsigned NumArgs,
149202
unsigned NumDataArgs) {
150203
assert(NumDataArgs <= 2);
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
2+
3+
// REQUIRES: amdgpu-registered-target
4+
// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1250 -emit-llvm -o - %s | FileCheck %s
5+
6+
typedef int v2i __attribute__((ext_vector_type(2)));
7+
typedef int v4i __attribute__((ext_vector_type(4)));
8+
9+
// CHECK-LABEL: define dso_local void @test_amdgcn_cooperative_atomic_store_32x4B(
10+
// CHECK-SAME: ptr addrspace(1) noundef writeonly captures(none) [[GADDR:%.*]], i32 noundef [[VAL:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
11+
// CHECK-NEXT: [[ENTRY:.*:]]
12+
// CHECK-NEXT: tail call void @llvm.amdgcn.cooperative.atomic.store.32x4B.p1(ptr addrspace(1) [[GADDR]], i32 [[VAL]], i32 0, metadata [[META4:![0-9]+]])
13+
// CHECK-NEXT: ret void
14+
//
15+
void test_amdgcn_cooperative_atomic_store_32x4B(global int* gaddr, int val)
16+
{
17+
__builtin_amdgcn_cooperative_atomic_store_32x4B(gaddr, val, __ATOMIC_RELAXED, "agent");
18+
}
19+
20+
// CHECK-LABEL: define dso_local i32 @test_amdgcn_cooperative_atomic_load_32x4B(
21+
// CHECK-SAME: ptr noundef readonly captures(none) [[ADDR:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
22+
// CHECK-NEXT: [[ENTRY:.*:]]
23+
// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr [[ADDR]], i32 0, metadata [[META5:![0-9]+]])
24+
// CHECK-NEXT: ret i32 [[TMP0]]
25+
//
26+
int test_amdgcn_cooperative_atomic_load_32x4B(int* addr)
27+
{
28+
return __builtin_amdgcn_cooperative_atomic_load_32x4B(addr, __ATOMIC_RELAXED, "");
29+
}
30+
31+
// CHECK-LABEL: define dso_local void @test_amdgcn_cooperative_atomic_store_16x8B(
32+
// CHECK-SAME: ptr addrspace(1) noundef writeonly captures(none) [[GADDR:%.*]], <2 x i32> noundef [[VAL:%.*]]) local_unnamed_addr #[[ATTR0]] {
33+
// CHECK-NEXT: [[ENTRY:.*:]]
34+
// CHECK-NEXT: tail call void @llvm.amdgcn.cooperative.atomic.store.16x8B.p1(ptr addrspace(1) [[GADDR]], <2 x i32> [[VAL]], i32 0, metadata [[META5]])
35+
// CHECK-NEXT: ret void
36+
//
37+
void test_amdgcn_cooperative_atomic_store_16x8B(global v2i* gaddr, v2i val)
38+
{
39+
__builtin_amdgcn_cooperative_atomic_store_16x8B(gaddr, val, __ATOMIC_RELAXED, "");
40+
}
41+
42+
// CHECK-LABEL: define dso_local <2 x i32> @test_amdgcn_cooperative_atomic_load_16x8B(
43+
// CHECK-SAME: ptr addrspace(1) noundef readonly captures(none) [[GADDR:%.*]]) local_unnamed_addr #[[ATTR2]] {
44+
// CHECK-NEXT: [[ENTRY:.*:]]
45+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i32> @llvm.amdgcn.cooperative.atomic.load.16x8B.p1(ptr addrspace(1) [[GADDR]], i32 0, metadata [[META6:![0-9]+]])
46+
// CHECK-NEXT: ret <2 x i32> [[TMP0]]
47+
//
48+
v2i test_amdgcn_cooperative_atomic_load_16x8B(global v2i* gaddr)
49+
{
50+
return __builtin_amdgcn_cooperative_atomic_load_16x8B(gaddr, __ATOMIC_RELAXED, "workgroup");
51+
}
52+
53+
// CHECK-LABEL: define dso_local void @test_amdgcn_cooperative_atomic_store_8x16B(
54+
// CHECK-SAME: ptr addrspace(1) noundef writeonly captures(none) [[GADDR:%.*]], <4 x i32> noundef [[VAL:%.*]]) local_unnamed_addr #[[ATTR0]] {
55+
// CHECK-NEXT: [[ENTRY:.*:]]
56+
// CHECK-NEXT: tail call void @llvm.amdgcn.cooperative.atomic.store.8x16B.p1(ptr addrspace(1) [[GADDR]], <4 x i32> [[VAL]], i32 0, metadata [[META7:![0-9]+]])
57+
// CHECK-NEXT: ret void
58+
//
59+
void test_amdgcn_cooperative_atomic_store_8x16B(global v4i* gaddr, v4i val)
60+
{
61+
__builtin_amdgcn_cooperative_atomic_store_8x16B(gaddr, val, __ATOMIC_RELAXED, "singlethread");
62+
}
63+
64+
// CHECK-LABEL: define dso_local <4 x i32> @test_amdgcn_cooperative_atomic_load_8x16B(
65+
// CHECK-SAME: ptr addrspace(1) noundef readonly captures(none) [[GADDR:%.*]]) local_unnamed_addr #[[ATTR2]] {
66+
// CHECK-NEXT: [[ENTRY:.*:]]
67+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.cooperative.atomic.load.8x16B.p1(ptr addrspace(1) [[GADDR]], i32 0, metadata [[META4]])
68+
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
69+
//
70+
v4i test_amdgcn_cooperative_atomic_load_8x16B(global v4i* gaddr)
71+
{
72+
return __builtin_amdgcn_cooperative_atomic_load_8x16B(gaddr, __ATOMIC_RELAXED, "agent");
73+
}
74+
75+
// CHECK-LABEL: define dso_local void @test_amdgcn_cooperative_atomic_store_32x4B_truncated(
76+
// CHECK-SAME: ptr addrspace(1) noundef writeonly captures(none) [[GADDR:%.*]], i64 noundef [[VAL:%.*]]) local_unnamed_addr #[[ATTR0]] {
77+
// CHECK-NEXT: [[ENTRY:.*:]]
78+
// CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[VAL]] to i32
79+
// CHECK-NEXT: tail call void @llvm.amdgcn.cooperative.atomic.store.32x4B.p1(ptr addrspace(1) [[GADDR]], i32 [[CONV]], i32 0, metadata [[META4]])
80+
// CHECK-NEXT: ret void
81+
//
82+
void test_amdgcn_cooperative_atomic_store_32x4B_truncated(global int* gaddr, long val)
83+
{
84+
__builtin_amdgcn_cooperative_atomic_store_32x4B(gaddr, val, __ATOMIC_RELAXED, "agent");
85+
}
86+
87+
// CHECK-LABEL: define dso_local void @test_amdgcn_cooperative_atomic_store_32x4B_extended(
88+
// CHECK-SAME: ptr addrspace(1) noundef writeonly captures(none) [[GADDR:%.*]], i8 noundef signext [[VAL:%.*]]) local_unnamed_addr #[[ATTR0]] {
89+
// CHECK-NEXT: [[ENTRY:.*:]]
90+
// CHECK-NEXT: [[CONV:%.*]] = sext i8 [[VAL]] to i32
91+
// CHECK-NEXT: tail call void @llvm.amdgcn.cooperative.atomic.store.32x4B.p1(ptr addrspace(1) [[GADDR]], i32 [[CONV]], i32 0, metadata [[META4]])
92+
// CHECK-NEXT: ret void
93+
//
94+
void test_amdgcn_cooperative_atomic_store_32x4B_extended(global int* gaddr, char val)
95+
{
96+
__builtin_amdgcn_cooperative_atomic_store_32x4B(gaddr, val, __ATOMIC_RELAXED, "agent");
97+
}
98+
99+
//.
100+
// CHECK: [[META4]] = !{!"agent"}
101+
// CHECK: [[META5]] = !{!""}
102+
// CHECK: [[META6]] = !{!"workgroup"}
103+
// CHECK: [[META7]] = !{!"singlethread"}
104+
//.
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2+
// REQUIRES: amdgpu-registered-target
3+
// RUN: %clang_cc1 -verify -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1250 -emit-llvm -o - %s
4+
5+
typedef int v2i __attribute__((ext_vector_type(2)));
6+
typedef int v4i __attribute__((ext_vector_type(4)));
7+
8+
void test_amdgcn_cooperative_atomic_store_32x4B(global int* gaddr, int val, const char* syncscope)
9+
{
10+
__builtin_amdgcn_cooperative_atomic_store_32x4B(gaddr, val, __ATOMIC_RELAXED, syncscope); // expected-error {{expression is not a string literal}}
11+
}
12+
13+
int test_amdgcn_cooperative_atomic_load_32x4B(global int* gaddr, const char* syncscope)
14+
{
15+
return __builtin_amdgcn_cooperative_atomic_load_32x4B(gaddr, __ATOMIC_RELAXED, syncscope); // expected-error {{expression is not a string literal}}
16+
}
17+
18+
void test_amdgcn_cooperative_atomic_store_16x8B(global v2i* gaddr, v2i val, const char* syncscope)
19+
{
20+
__builtin_amdgcn_cooperative_atomic_store_16x8B(gaddr, val, __ATOMIC_RELAXED, syncscope); // expected-error {{expression is not a string literal}}
21+
}
22+
23+
v2i test_amdgcn_cooperative_atomic_load_16x8B(global v2i* gaddr, const char* syncscope)
24+
{
25+
return __builtin_amdgcn_cooperative_atomic_load_16x8B(gaddr, __ATOMIC_RELAXED, syncscope); // expected-error {{expression is not a string literal}}
26+
}
27+
28+
void test_amdgcn_cooperative_atomic_store_8x16B(global v4i* gaddr, v4i val, const char* syncscope)
29+
{
30+
__builtin_amdgcn_cooperative_atomic_store_8x16B(gaddr, val, __ATOMIC_RELAXED, syncscope); // expected-error {{expression is not a string literal}}
31+
}
32+
33+
v4i test_amdgcn_cooperative_atomic_load_8x16B(global v4i* gaddr, const char* syncscope)
34+
{
35+
return __builtin_amdgcn_cooperative_atomic_load_8x16B(gaddr, __ATOMIC_RELAXED, syncscope); // expected-error {{expression is not a string literal}}
36+
}
37+
38+
v4i test_amdgcn_cooperative_atomic_load_8x16B_release(global v4i* gaddr)
39+
{
40+
return __builtin_amdgcn_cooperative_atomic_load_8x16B(gaddr, __ATOMIC_RELEASE, ""); // expected-warning {{memory order argument to atomic operation is invalid}}
41+
}
42+
43+
v4i test_amdgcn_cooperative_atomic_load_8x16B_acq_rel(global v4i* gaddr)
44+
{
45+
return __builtin_amdgcn_cooperative_atomic_load_8x16B(gaddr, __ATOMIC_ACQ_REL, ""); // expected-warning {{memory order argument to atomic operation is invalid}}
46+
}
47+
48+
void test_amdgcn_cooperative_atomic_store_32x4B__sharedptr(local int* addr, int val)
49+
{
50+
__builtin_amdgcn_cooperative_atomic_store_32x4B(addr, val, __ATOMIC_RELAXED, ""); // expected-error {{cooperative atomic requires a global or generic pointer}}
51+
}
52+
53+
void test_amdgcn_cooperative_atomic_store_32x4B__ordering_not_imm(local int* addr, int ord, int val)
54+
{
55+
__builtin_amdgcn_cooperative_atomic_store_32x4B(addr, val, ord, ""); // expected-error {{argument to '__builtin_amdgcn_cooperative_atomic_store_32x4B' must be a constant integer}}
56+
}
57+
58+
void test_amdgcn_cooperative_atomic_store_32x4B__acquire(int* addr, int ord, int val)
59+
{
60+
__builtin_amdgcn_cooperative_atomic_store_32x4B(addr, val, __ATOMIC_ACQUIRE, ""); // expected-warning {{memory order argument to atomic operation is invalid}}
61+
}
62+
63+
void test_amdgcn_cooperative_atomic_store_32x4B__acq_rel(int* addr, int ord, int val)
64+
{
65+
__builtin_amdgcn_cooperative_atomic_store_32x4B(addr, val, __ATOMIC_ACQ_REL, ""); // expected-warning {{memory order argument to atomic operation is invalid}}
66+
}

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3769,6 +3769,36 @@ def int_amdgcn_perm_pk16_b8_u4 : ClangBuiltin<"__builtin_amdgcn_perm_pk16_b8_u4"
37693769
DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_i64_ty, llvm_i64_ty, llvm_v2i32_ty],
37703770
[IntrNoMem, IntrSpeculatable]>;
37713771

3772+
class AMDGPUCooperativeAtomicStore<LLVMType Ty> : Intrinsic <
3773+
[],
3774+
[llvm_anyptr_ty, // pointer to store to
3775+
Ty, // value to store
3776+
llvm_i32_ty, // C ABI Atomic Ordering ID
3777+
llvm_metadata_ty], // syncscope
3778+
[IntrWriteMem, WriteOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>, ImmArg<ArgIndex<2>>,
3779+
IntrNoCallback, IntrNoFree, IntrConvergent],
3780+
"",
3781+
[SDNPMemOperand, SDNPMayStore]
3782+
>;
3783+
3784+
class AMDGPUCooperativeAtomicLoad<LLVMType Ty> : Intrinsic <
3785+
[Ty],
3786+
[llvm_anyptr_ty, // pointer to load from
3787+
llvm_i32_ty, // C ABI Atomic Ordering ID
3788+
llvm_metadata_ty], // syncscope
3789+
[IntrReadMem, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>, ImmArg<ArgIndex<1>>,
3790+
IntrNoCallback, IntrNoFree, IntrConvergent],
3791+
"",
3792+
[SDNPMemOperand, SDNPMayLoad]
3793+
>;
3794+
3795+
def int_amdgcn_cooperative_atomic_load_32x4B : AMDGPUCooperativeAtomicLoad<llvm_i32_ty>;
3796+
def int_amdgcn_cooperative_atomic_store_32x4B : AMDGPUCooperativeAtomicStore<llvm_i32_ty>;
3797+
def int_amdgcn_cooperative_atomic_load_16x8B : AMDGPUCooperativeAtomicLoad<llvm_v2i32_ty>;
3798+
def int_amdgcn_cooperative_atomic_store_16x8B : AMDGPUCooperativeAtomicStore<llvm_v2i32_ty>;
3799+
def int_amdgcn_cooperative_atomic_load_8x16B : AMDGPUCooperativeAtomicLoad<llvm_v4i32_ty>;
3800+
def int_amdgcn_cooperative_atomic_store_8x16B : AMDGPUCooperativeAtomicStore<llvm_v4i32_ty>;
3801+
37723802
//===----------------------------------------------------------------------===//
37733803
// Special Intrinsics for backend internal use only. No frontend
37743804
// should emit calls to these.

llvm/include/llvm/Target/TargetSelectionDAG.td

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1968,6 +1968,12 @@ def atomic_load_nonext_64 :
19681968
let MemoryVT = i64;
19691969
}
19701970

1971+
def atomic_load_nonext_128 :
1972+
PatFrag<(ops node:$ptr), (atomic_load_nonext node:$ptr)> {
1973+
let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic?
1974+
let MemoryVT = i128;
1975+
}
1976+
19711977
def atomic_load_zext_8 :
19721978
PatFrag<(ops node:$ptr), (atomic_load_zext node:$ptr)> {
19731979
let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic?
@@ -2199,6 +2205,13 @@ def atomic_store_64 :
21992205
let MemoryVT = i64;
22002206
}
22012207

2208+
def atomic_store_128 :
2209+
PatFrag<(ops node:$val, node:$ptr),
2210+
(atomic_store node:$val, node:$ptr)> {
2211+
let IsAtomic = true;
2212+
let MemoryVT = i128;
2213+
}
2214+
22022215
//===----------------------------------------------------------------------===//
22032216
// Selection DAG Pattern Support.
22042217
//

0 commit comments

Comments
 (0)