Skip to content

Commit 3a0a8f4

Browse files
committed
clang/AMDGPU: Emit atomicrmw from flat_atomic_{f32|f64} builtins
1 parent 926b7d4 commit 3a0a8f4

File tree

3 files changed

+12
-14
lines changed

3 files changed

+12
-14
lines changed

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18643,10 +18643,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
1864318643
}
1864418644
case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
1864518645
case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
18646-
case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
1864718646
case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
18648-
case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
18649-
case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: {
18647+
case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: {
1865018648
Intrinsic::ID IID;
1865118649
llvm::Type *ArgTy = llvm::Type::getDoubleTy(getLLVMContext());
1865218650
switch (BuiltinID) {
@@ -18656,19 +18654,12 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
1865618654
case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
1865718655
IID = Intrinsic::amdgcn_global_atomic_fmax;
1865818656
break;
18659-
case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
18660-
IID = Intrinsic::amdgcn_flat_atomic_fadd;
18661-
break;
1866218657
case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
1866318658
IID = Intrinsic::amdgcn_flat_atomic_fmin;
1866418659
break;
1866518660
case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
1866618661
IID = Intrinsic::amdgcn_flat_atomic_fmax;
1866718662
break;
18668-
case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
18669-
ArgTy = llvm::Type::getFloatTy(getLLVMContext());
18670-
IID = Intrinsic::amdgcn_flat_atomic_fadd;
18671-
break;
1867218663
}
1867318664
llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
1867418665
llvm::Value *Val = EmitScalarExpr(E->getArg(1));
@@ -19063,7 +19054,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
1906319054
case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
1906419055
case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
1906519056
case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
19066-
case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: {
19057+
case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
19058+
case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
19059+
case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: {
1906719060
llvm::AtomicRMWInst::BinOp BinOp;
1906819061
switch (BuiltinID) {
1906919062
case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
@@ -19083,6 +19076,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
1908319076
case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
1908419077
case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
1908519078
case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
19079+
case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
19080+
case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
1908619081
BinOp = llvm::AtomicRMWInst::FAdd;
1908719082
break;
1908819083
case AMDGPU::BI__builtin_amdgcn_ds_fminf:

clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,8 @@ void test_global_max_f64(__global double *addr, double x){
4545
}
4646

4747
// CHECK-LABEL: test_flat_add_local_f64
48-
// CHECK: call double @llvm.amdgcn.flat.atomic.fadd.f64.p3.f64(ptr addrspace(3) %{{.*}}, double %{{.*}})
48+
// CHECK: = atomicrmw fadd ptr addrspace(3) %{{.+}}, double %{{.+}} syncscope("agent") seq_cst, align 8{{$}}
49+
4950
// GFX90A-LABEL: test_flat_add_local_f64$local
5051
// GFX90A: ds_add_rtn_f64
5152
void test_flat_add_local_f64(__local double *addr, double x){
@@ -54,7 +55,8 @@ void test_flat_add_local_f64(__local double *addr, double x){
5455
}
5556

5657
// CHECK-LABEL: test_flat_global_add_f64
57-
// CHECK: call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr addrspace(1) %{{.*}}, double %{{.*}})
58+
// CHECK: = atomicrmw fadd ptr addrspace(1) {{.+}}, double %{{.+}} syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
59+
5860
// GFX90A-LABEL: test_flat_global_add_f64$local
5961
// GFX90A: global_atomic_add_f64
6062
void test_flat_global_add_f64(__global double *addr, double x){

clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@ typedef half __attribute__((ext_vector_type(2))) half2;
1010
typedef short __attribute__((ext_vector_type(2))) short2;
1111

1212
// CHECK-LABEL: test_flat_add_f32
13-
// CHECK: call float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr %{{.*}}, float %{{.*}})
13+
// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr %{{.+}}, float %{{.+}} syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}}
14+
1415
// GFX940-LABEL: test_flat_add_f32
1516
// GFX940: flat_atomic_add_f32
1617
half2 test_flat_add_f32(__generic float *addr, float x) {

0 commit comments

Comments
 (0)