Skip to content

Commit 59e1b30

Browse files
committed
Refined choice of target features.
1 parent c82bff3 commit 59e1b30

11 files changed

+102
-124
lines changed

clang/include/clang/Basic/BuiltinsAMDGPU.def

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,13 @@ BUILTIN(__builtin_amdgcn_raw_buffer_load_b64, "V2UiQbiiIi", "n")
163163
BUILTIN(__builtin_amdgcn_raw_buffer_load_b96, "V3UiQbiiIi", "n")
164164
BUILTIN(__builtin_amdgcn_raw_buffer_load_b128, "V4UiQbiiIi", "n")
165165

166+
BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32, "iiQbiiIi", "t")
167+
TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32, "ffQbiiIi", "t", "atomic-fadd-rtn-insts")
168+
TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16, "V2hV2hQbiiIi", "t", "atomic-buffer-global-pk-add-f16-insts")
169+
170+
TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32, "ffQbiiIi", "t", "atomic-fmin-fmax-global-f32")
171+
TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64, "ddQbiiIi", "t", "atomic-fmin-fmax-global-f64")
172+
166173
TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_load_lds, "vQbv*3IUiiiIiIi", "t", "vmem-to-lds-load-insts")
167174
TARGET_BUILTIN(__builtin_amdgcn_struct_ptr_buffer_load_lds, "vQbv*3IUiiiiIiIi", "t", "vmem-to-lds-load-insts")
168175

@@ -252,15 +259,6 @@ TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fmax_f64, "dd*0d", "t", "gfx90a-inst
252259
TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_f64, "dd*3d", "t", "gfx90a-insts")
253260
TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_f32, "ff*3f", "t", "gfx8-insts")
254261

255-
TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32, "iiQbiiIi", "t", "gfx90a-insts")
256-
TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32, "ffQbiiIi", "t", "gfx90a-insts")
257-
TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f64, "ddQbiiIi", "t", "gfx90a-insts")
258-
TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16, "V2hV2hQbiiIi", "t", "gfx90a-insts")
259-
260-
TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32, "ffQbiiIi", "t", "gfx90a-insts")
261-
TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64, "ddQbiiIi", "t", "gfx90a-insts")
262-
TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_v2f16, "V2hV2hQbiiIi", "t", "gfx90a-insts")
263-
264262
TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fadd_f32, "ff*0f", "t", "gfx940-insts")
265263
TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fadd_v2f16, "V2hV2h*0V2h", "t", "atomic-flat-pk-add-16-insts")
266264
TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fadd_v2bf16, "V2sV2s*0V2s", "t", "atomic-flat-pk-add-16-insts")

clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1441,11 +1441,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
14411441
}
14421442
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32:
14431443
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32:
1444-
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f64:
14451444
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16:
14461445
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32:
1447-
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64:
1448-
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_v2f16: {
1446+
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64: {
14491447
llvm::Type *RetTy;
14501448
switch (BuiltinID) {
14511449
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32:
@@ -1455,27 +1453,24 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
14551453
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32:
14561454
RetTy = FloatTy;
14571455
break;
1458-
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f64:
14591456
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64:
14601457
RetTy = DoubleTy;
14611458
break;
14621459
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16:
1463-
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_v2f16:
14641460
RetTy = llvm::FixedVectorType::get(HalfTy, 2);
1461+
break;
14651462
}
14661463
unsigned IID;
14671464
switch (BuiltinID) {
14681465
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32:
14691466
IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_add;
14701467
break;
14711468
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32:
1472-
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f64:
14731469
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16:
14741470
IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd;
14751471
break;
14761472
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32:
14771473
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64:
1478-
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_v2f16:
14791474
IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmax;
14801475
break;
14811476
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx90a -S -verify=gfx90a,expected -o - %s
2+
// REQUIRES: amdgpu-registered-target
3+
4+
typedef half __attribute__((ext_vector_type(2))) float16x2_t;
5+
6+
void test_raw_ptr_atomics(__amdgpu_buffer_rsrc_t rsrc, int i32, float f32, float16x2_t v2f16, int offset, int soffset, int x) {
7+
i32 = __builtin_amdgcn_raw_ptr_buffer_atomic_add_i32(i32, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_add_i32' must be a constant integer}}
8+
f32 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32(f32, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32' must be a constant integer}}
9+
v2f16 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16(v2f16, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16' must be a constant integer}}
10+
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx908 -S -verify=gfx908,expected -o - %s
2+
// REQUIRES: amdgpu-registered-target
3+
4+
typedef half __attribute__((ext_vector_type(2))) float16x2_t;
5+
6+
void test_raw_ptr_atomics(__amdgpu_buffer_rsrc_t rsrc, float f32, float16x2_t v2f16, int offset, int soffset) {
7+
f32 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32(f32, rsrc, offset, soffset, 0); // expected-error{{'__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32' needs target feature atomic-fadd-rtn-insts}}
8+
v2f16 = __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16(v2f16, rsrc, offset, soffset, 0); // expected-error{{'__builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16' needs target feature atomic-buffer-global-pk-add-f16-insts}}
9+
}
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
2+
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx90a -emit-llvm -o - %s | FileCheck %s
3+
4+
// REQUIRES: amdgpu-registered-target
5+
6+
typedef half __attribute__((ext_vector_type(2))) float16x2_t;
7+
8+
// CHECK-LABEL: define dso_local i32 @test_atomic_add_i32(
9+
// CHECK-SAME: ptr addrspace(8) captures(none) [[RSRC:%.*]], i32 noundef [[X:%.*]], i32 noundef [[OFFSET:%.*]], i32 noundef [[SOFFSET:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
10+
// CHECK-NEXT: [[ENTRY:.*:]]
11+
// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 [[X]], ptr addrspace(8) [[RSRC]], i32 [[OFFSET]], i32 [[SOFFSET]], i32 0)
12+
// CHECK-NEXT: ret i32 [[TMP0]]
13+
//
14+
int test_atomic_add_i32(__amdgpu_buffer_rsrc_t rsrc, int x, int offset, int soffset) {
15+
return __builtin_amdgcn_raw_ptr_buffer_atomic_add_i32(x, rsrc, offset, soffset, 0);
16+
}
17+
18+
// CHECK-LABEL: define dso_local float @test_atomic_fadd_f32(
19+
// CHECK-SAME: ptr addrspace(8) captures(none) [[RSRC:%.*]], float noundef [[X:%.*]], i32 noundef [[OFFSET:%.*]], i32 noundef [[SOFFSET:%.*]]) local_unnamed_addr #[[ATTR0]] {
20+
// CHECK-NEXT: [[ENTRY:.*:]]
21+
// CHECK-NEXT: [[TMP0:%.*]] = tail call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float [[X]], ptr addrspace(8) [[RSRC]], i32 [[OFFSET]], i32 [[SOFFSET]], i32 0)
22+
// CHECK-NEXT: ret float [[TMP0]]
23+
//
24+
float test_atomic_fadd_f32(__amdgpu_buffer_rsrc_t rsrc, float x, int offset, int soffset) {
25+
return __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32(x, rsrc, offset, soffset, 0);
26+
}
27+
28+
// CHECK-LABEL: define dso_local <2 x half> @test_atomic_fadd_v2f16(
29+
// CHECK-SAME: ptr addrspace(8) captures(none) [[RSRC:%.*]], <2 x half> noundef [[X:%.*]], i32 noundef [[OFFSET:%.*]], i32 noundef [[SOFFSET:%.*]]) local_unnamed_addr #[[ATTR0]] {
30+
// CHECK-NEXT: [[ENTRY:.*:]]
31+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> [[X]], ptr addrspace(8) [[RSRC]], i32 [[OFFSET]], i32 [[SOFFSET]], i32 0)
32+
// CHECK-NEXT: ret <2 x half> [[TMP0]]
33+
//
34+
float16x2_t test_atomic_fadd_v2f16(__amdgpu_buffer_rsrc_t rsrc, float16x2_t x, int offset, int soffset) {
35+
return __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16(x, rsrc, offset, soffset, 0);
36+
}
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-feature +atomic-fmin-fmax-global-f32 -target-feature +atomic-fmin-fmax-global-f64 -S -verify=expected -o - %s
2+
// REQUIRES: amdgpu-registered-target
3+
4+
void test_raw_ptr_atomics(__amdgpu_buffer_rsrc_t rsrc, float f32, double f64, int offset, int soffset, int x) {
5+
f32 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32(f32, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32' must be a constant integer}}
6+
f64 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64(f64, rsrc, offset, soffset, x); // expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64' must be a constant integer}}
7+
}
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -S -verify=expected -o - %s
2+
// REQUIRES: amdgpu-registered-target
3+
4+
void test_raw_ptr_atomics(__amdgpu_buffer_rsrc_t rsrc, float f32, double f64, int offset, int soffset) {
5+
f32 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32(f32, rsrc, offset, soffset, 0); // expected-error{{'__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32' needs target feature atomic-fmin-fmax-global-f32}}
6+
f64 = __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64(f64, rsrc, offset, soffset, 0); // expected-error{{'__builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64' needs target feature atomic-fmin-fmax-global-f64}}
7+
}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
2+
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-feature +atomic-fmin-fmax-global-f32 -target-feature +atomic-fmin-fmax-global-f64 -emit-llvm -o - %s | FileCheck %s
3+
4+
// REQUIRES: amdgpu-registered-target
5+
6+
// CHECK-LABEL: define dso_local float @test_atomic_fmax_f32(
7+
// CHECK-SAME: ptr addrspace(8) captures(none) [[RSRC:%.*]], float noundef [[X:%.*]], i32 noundef [[OFFSET:%.*]], i32 noundef [[SOFFSET:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
8+
// CHECK-NEXT: [[ENTRY:.*:]]
9+
// CHECK-NEXT: [[TMP0:%.*]] = tail call float @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f32(float [[X]], ptr addrspace(8) [[RSRC]], i32 [[OFFSET]], i32 [[SOFFSET]], i32 0)
10+
// CHECK-NEXT: ret float [[TMP0]]
11+
//
12+
float test_atomic_fmax_f32(__amdgpu_buffer_rsrc_t rsrc, float x, int offset, int soffset) {
13+
return __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32(x, rsrc, offset, soffset, 0);
14+
}
15+
16+
// CHECK-LABEL: define dso_local double @test_atomic_fmax_f64(
17+
// CHECK-SAME: ptr addrspace(8) captures(none) [[RSRC:%.*]], double noundef [[X:%.*]], i32 noundef [[OFFSET:%.*]], i32 noundef [[SOFFSET:%.*]]) local_unnamed_addr #[[ATTR0]] {
18+
// CHECK-NEXT: [[ENTRY:.*:]]
19+
// CHECK-NEXT: [[TMP0:%.*]] = tail call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double [[X]], ptr addrspace(8) [[RSRC]], i32 [[OFFSET]], i32 [[SOFFSET]], i32 0)
20+
// CHECK-NEXT: ret double [[TMP0]]
21+
//
22+
double test_atomic_fmax_f64(__amdgpu_buffer_rsrc_t rsrc, double x, int offset, int soffset) {
23+
return __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64(x, rsrc, offset, soffset, 0);
24+
}

clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomics-gfx908-target-err.cl

Lines changed: 0 additions & 16 deletions
This file was deleted.

clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-atomics-gfx90a-err.cl

Lines changed: 0 additions & 16 deletions
This file was deleted.

0 commit comments

Comments
 (0)