Skip to content

Commit c82bff3

Browse files
committed
Exposed some AMDGCN raw buffer atomic intrinsics to clang.
1 parent 4355356 commit c82bff3

File tree

5 files changed

+163
-0
lines changed

5 files changed

+163
-0
lines changed

clang/include/clang/Basic/BuiltinsAMDGPU.def

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,15 @@ TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fmax_f64, "dd*0d", "t", "gfx90a-inst
252252
TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_f64, "dd*3d", "t", "gfx90a-insts")
253253
TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_f32, "ff*3f", "t", "gfx8-insts")
254254

255+
TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32, "iiQbiiIi", "t", "gfx90a-insts")
256+
TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32, "ffQbiiIi", "t", "gfx90a-insts")
257+
TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f64, "ddQbiiIi", "t", "gfx90a-insts")
258+
TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16, "V2hV2hQbiiIi", "t", "gfx90a-insts")
259+
260+
TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32, "ffQbiiIi", "t", "gfx90a-insts")
261+
TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64, "ddQbiiIi", "t", "gfx90a-insts")
262+
TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_v2f16, "V2hV2hQbiiIi", "t", "gfx90a-insts")
263+
255264
TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fadd_f32, "ff*0f", "t", "gfx940-insts")
256265
TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fadd_v2f16, "V2hV2h*0V2h", "t", "atomic-flat-pk-add-16-insts")
257266
TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fadd_v2bf16, "V2sV2s*0V2s", "t", "atomic-flat-pk-add-16-insts")

clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1439,6 +1439,52 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
14391439
F, {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)),
14401440
EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3))});
14411441
}
1442+
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32:
1443+
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32:
1444+
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f64:
1445+
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16:
1446+
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32:
1447+
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64:
1448+
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_v2f16: {
1449+
llvm::Type *RetTy;
1450+
switch (BuiltinID) {
1451+
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32:
1452+
RetTy = Int32Ty;
1453+
break;
1454+
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32:
1455+
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32:
1456+
RetTy = FloatTy;
1457+
break;
1458+
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f64:
1459+
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64:
1460+
RetTy = DoubleTy;
1461+
break;
1462+
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16:
1463+
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_v2f16:
1464+
RetTy = llvm::FixedVectorType::get(HalfTy, 2);
1465+
}
1466+
unsigned IID;
1467+
switch (BuiltinID) {
1468+
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32:
1469+
IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_add;
1470+
break;
1471+
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32:
1472+
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f64:
1473+
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16:
1474+
IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd;
1475+
break;
1476+
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32:
1477+
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64:
1478+
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_v2f16:
1479+
IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmax;
1480+
break;
1481+
}
1482+
llvm::Function *F = CGM.getIntrinsic(IID, RetTy);
1483+
return Builder.CreateCall(
1484+
F, {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)),
1485+
EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3)),
1486+
EmitScalarExpr(E->getArg(4))});
1487+
}
14421488
case AMDGPU::BI__builtin_amdgcn_s_prefetch_data:
14431489
return emitBuiltinWithOneOverloadedType<2>(
14441490
*this, E, Intrinsic::amdgcn_s_prefetch_data);
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx908 -S -verify=gfx908,expected -o - %s
2+
3+
// REQUIRES: amdgpu-registered-target
4+
5+
typedef half __attribute__((ext_vector_type(2))) float16x2_t;
6+
7+
void test_raw_ptr_atomics(__amdgpu_buffer_rsrc_t rsrc, int i32, float f32, double f64, float16x2_t v2f16, int offset, int soffset) {
8+
i32 = __builtin_amdgcn_raw_ptr_buffer_atomic_add_i32(i32, rsrc, offset, soffset, 0); // expected-error{{'__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32' needs target feature gfx90a-insts}}
9+
f32 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32(f32, rsrc, offset, soffset, 0); // expected-error{{'__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32' needs target feature gfx90a-insts}}
10+
f64 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f64(f64, rsrc, offset, soffset, 0); // expected-error{{'__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f64' needs target feature gfx90a-insts}}
11+
v2f16 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16(v2f16, rsrc, offset, soffset, 0); // expected-error{{'__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16' needs target feature gfx90a-insts}}
12+
13+
f32 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32(f32, rsrc, offset, soffset, 0); // expected-error{{'__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32' needs target feature gfx90a-insts}}
14+
f64 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64(f64, rsrc, offset, soffset, 0); // expected-error{{'__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64' needs target feature gfx90a-insts}}
15+
v2f16 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_v2f16(v2f16, rsrc, offset, soffset, 0); // expected-error{{'__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_v2f16' needs target feature gfx90a-insts}}
16+
}
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx90a -S -verify=gfx90a,expected -o - %s
2+
3+
// REQUIRES: amdgpu-registered-target
4+
5+
typedef half __attribute__((ext_vector_type(2))) float16x2_t;
6+
7+
void test_raw_ptr_atomics(__amdgpu_buffer_rsrc_t rsrc, int i32, float f32, double f64, float16x2_t v2f16, int offset, int soffset, int x) {
8+
i32 = __builtin_amdgcn_raw_ptr_buffer_atomic_add_i32(i32, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32' must be a constant integer}}
9+
f32 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32(f32, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32' must be a constant integer}}
10+
f64 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f64(f64, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f64' must be a constant integer}}
11+
v2f16 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16(v2f16, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16' must be a constant integer}}
12+
13+
f32 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32(f32, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32' must be a constant integer}}
14+
f64 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64(f64, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64' must be a constant integer}}
15+
v2f16 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_v2f16(v2f16, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_v2f16' must be a constant integer}}
16+
}
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
2+
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx90a -emit-llvm -o - %s | FileCheck %s
3+
4+
// REQUIRES: amdgpu-registered-target
5+
6+
typedef half __attribute__((ext_vector_type(2))) float16x2_t;
7+
8+
// CHECK-LABEL: define dso_local i32 @test_atomic_add_i32(
9+
// CHECK-SAME: ptr addrspace(8) captures(none) [[RSRC:%.*]], i32 noundef [[X:%.*]], i32 noundef [[OFFSET:%.*]], i32 noundef [[SOFFSET:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
10+
// CHECK-NEXT: [[ENTRY:.*:]]
11+
// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 [[X]], ptr addrspace(8) [[RSRC]], i32 [[OFFSET]], i32 [[SOFFSET]], i32 0)
12+
// CHECK-NEXT: ret i32 [[TMP0]]
13+
//
14+
int test_atomic_add_i32(__amdgpu_buffer_rsrc_t rsrc, int x, int offset, int soffset) {
15+
return __builtin_amdgcn_raw_ptr_buffer_atomic_add_i32(x, rsrc, offset, soffset, 0);
16+
}
17+
18+
// CHECK-LABEL: define dso_local float @test_atomic_fadd_f32(
19+
// CHECK-SAME: ptr addrspace(8) captures(none) [[RSRC:%.*]], float noundef [[X:%.*]], i32 noundef [[OFFSET:%.*]], i32 noundef [[SOFFSET:%.*]]) local_unnamed_addr #[[ATTR0]] {
20+
// CHECK-NEXT: [[ENTRY:.*:]]
21+
// CHECK-NEXT: [[TMP0:%.*]] = tail call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float [[X]], ptr addrspace(8) [[RSRC]], i32 [[OFFSET]], i32 [[SOFFSET]], i32 0)
22+
// CHECK-NEXT: ret float [[TMP0]]
23+
//
24+
float test_atomic_fadd_f32(__amdgpu_buffer_rsrc_t rsrc, float x, int offset, int soffset) {
25+
return __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32(x, rsrc, offset, soffset, 0);
26+
}
27+
28+
// CHECK-LABEL: define dso_local double @test_atomic_fadd_f64(
29+
// CHECK-SAME: ptr addrspace(8) captures(none) [[RSRC:%.*]], double noundef [[X:%.*]], i32 noundef [[OFFSET:%.*]], i32 noundef [[SOFFSET:%.*]]) local_unnamed_addr #[[ATTR0]] {
30+
// CHECK-NEXT: [[ENTRY:.*:]]
31+
// CHECK-NEXT: [[TMP0:%.*]] = tail call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double [[X]], ptr addrspace(8) [[RSRC]], i32 [[OFFSET]], i32 [[SOFFSET]], i32 0)
32+
// CHECK-NEXT: ret double [[TMP0]]
33+
//
34+
double test_atomic_fadd_f64(__amdgpu_buffer_rsrc_t rsrc, double x, int offset, int soffset) {
35+
return __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f64(x, rsrc, offset, soffset, 0);
36+
}
37+
38+
// CHECK-LABEL: define dso_local <2 x half> @test_atomic_fadd_v2f16(
39+
// CHECK-SAME: ptr addrspace(8) captures(none) [[RSRC:%.*]], <2 x half> noundef [[X:%.*]], i32 noundef [[OFFSET:%.*]], i32 noundef [[SOFFSET:%.*]]) local_unnamed_addr #[[ATTR0]] {
40+
// CHECK-NEXT: [[ENTRY:.*:]]
41+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> [[X]], ptr addrspace(8) [[RSRC]], i32 [[OFFSET]], i32 [[SOFFSET]], i32 0)
42+
// CHECK-NEXT: ret <2 x half> [[TMP0]]
43+
//
44+
float16x2_t test_atomic_fadd_v2f16(__amdgpu_buffer_rsrc_t rsrc, float16x2_t x, int offset, int soffset) {
45+
return __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16(x, rsrc, offset, soffset, 0);
46+
}
47+
48+
// CHECK-LABEL: define dso_local float @test_atomic_fmax_f32(
49+
// CHECK-SAME: ptr addrspace(8) captures(none) [[RSRC:%.*]], float noundef [[X:%.*]], i32 noundef [[OFFSET:%.*]], i32 noundef [[SOFFSET:%.*]]) local_unnamed_addr #[[ATTR0]] {
50+
// CHECK-NEXT: [[ENTRY:.*:]]
51+
// CHECK-NEXT: [[TMP0:%.*]] = tail call float @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f32(float [[X]], ptr addrspace(8) [[RSRC]], i32 [[OFFSET]], i32 [[SOFFSET]], i32 0)
52+
// CHECK-NEXT: ret float [[TMP0]]
53+
//
54+
float test_atomic_fmax_f32(__amdgpu_buffer_rsrc_t rsrc, float x, int offset, int soffset) {
55+
return __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32(x, rsrc, offset, soffset, 0);
56+
}
57+
58+
// CHECK-LABEL: define dso_local double @test_atomic_fmax_f64(
59+
// CHECK-SAME: ptr addrspace(8) captures(none) [[RSRC:%.*]], double noundef [[X:%.*]], i32 noundef [[OFFSET:%.*]], i32 noundef [[SOFFSET:%.*]]) local_unnamed_addr #[[ATTR0]] {
60+
// CHECK-NEXT: [[ENTRY:.*:]]
61+
// CHECK-NEXT: [[TMP0:%.*]] = tail call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double [[X]], ptr addrspace(8) [[RSRC]], i32 [[OFFSET]], i32 [[SOFFSET]], i32 0)
62+
// CHECK-NEXT: ret double [[TMP0]]
63+
//
64+
double test_atomic_fmax_f64(__amdgpu_buffer_rsrc_t rsrc, double x, int offset, int soffset) {
65+
return __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64(x, rsrc, offset, soffset, 0);
66+
}
67+
68+
// CHECK-LABEL: define dso_local <2 x half> @test_atomic_fmax_v2f16(
69+
// CHECK-SAME: ptr addrspace(8) captures(none) [[RSRC:%.*]], <2 x half> noundef [[X:%.*]], i32 noundef [[OFFSET:%.*]], i32 noundef [[SOFFSET:%.*]]) local_unnamed_addr #[[ATTR0]] {
70+
// CHECK-NEXT: [[ENTRY:.*:]]
71+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.v2f16(<2 x half> [[X]], ptr addrspace(8) [[RSRC]], i32 [[OFFSET]], i32 [[SOFFSET]], i32 0)
72+
// CHECK-NEXT: ret <2 x half> [[TMP0]]
73+
//
74+
float16x2_t test_atomic_fmax_v2f16(__amdgpu_buffer_rsrc_t rsrc, float16x2_t x, int offset, int soffset) {
75+
return __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_v2f16(x, rsrc, offset, soffset, 0);
76+
}

0 commit comments

Comments
 (0)