Skip to content

Commit 0e174a5

Browse files
committed
clang/AMDGPU: Emit atomicrmw for flat/global atomic min/max f64 builtins
1 parent 94d04eb commit 0e174a5

File tree

2 files changed

+27
-33
lines changed

2 files changed

+27
-33
lines changed

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 15 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -18655,32 +18655,6 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
1865518655
Src0 = Builder.CreatePointerBitCastOrAddrSpaceCast(Src0, PTy);
1865618656
return Builder.CreateCall(F, { Src0, Src1, Src2, Src3, Src4 });
1865718657
}
18658-
case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
18659-
case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
18660-
case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
18661-
case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: {
18662-
Intrinsic::ID IID;
18663-
llvm::Type *ArgTy = llvm::Type::getDoubleTy(getLLVMContext());
18664-
switch (BuiltinID) {
18665-
case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
18666-
IID = Intrinsic::amdgcn_global_atomic_fmin;
18667-
break;
18668-
case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
18669-
IID = Intrinsic::amdgcn_global_atomic_fmax;
18670-
break;
18671-
case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
18672-
IID = Intrinsic::amdgcn_flat_atomic_fmin;
18673-
break;
18674-
case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
18675-
IID = Intrinsic::amdgcn_flat_atomic_fmax;
18676-
break;
18677-
}
18678-
llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
18679-
llvm::Value *Val = EmitScalarExpr(E->getArg(1));
18680-
llvm::Function *F =
18681-
CGM.getIntrinsic(IID, {ArgTy, Addr->getType(), Val->getType()});
18682-
return Builder.CreateCall(F, {Addr, Val});
18683-
}
1868418658
case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
1868518659
case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
1868618660
case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
@@ -19054,7 +19028,11 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
1905419028
case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
1905519029
case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
1905619030
case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
19057-
case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: {
19031+
case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
19032+
case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
19033+
case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
19034+
case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
19035+
case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: {
1905819036
llvm::AtomicRMWInst::BinOp BinOp;
1905919037
switch (BuiltinID) {
1906019038
case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
@@ -19080,6 +19058,16 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
1908019058
case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
1908119059
BinOp = llvm::AtomicRMWInst::FAdd;
1908219060
break;
19061+
case AMDGPU::BI__builtin_amdgcn_ds_fminf:
19062+
case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
19063+
case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
19064+
BinOp = llvm::AtomicRMWInst::FMin;
19065+
break;
19066+
case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
19067+
case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
19068+
case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
19069+
BinOp = llvm::AtomicRMWInst::FMax;
19070+
break;
1908319071
}
1908419072

1908519073
Address Ptr = CheckAtomicAlignment(*this, E);

clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@ void test_global_add_half2(__global half2 *addr, half2 x) {
2727
}
2828

2929
// CHECK-LABEL: test_global_global_min_f64
30-
// CHECK: call double @llvm.amdgcn.global.atomic.fmin.f64.p1.f64(ptr addrspace(1) %{{.*}}, double %{{.*}})
30+
// CHECK: = atomicrmw fmin ptr addrspace(1) {{.+}}, double %{{.+}} syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
31+
3132
// GFX90A-LABEL: test_global_global_min_f64$local
3233
// GFX90A: global_atomic_min_f64
3334
void test_global_global_min_f64(__global double *addr, double x){
@@ -36,7 +37,8 @@ void test_global_global_min_f64(__global double *addr, double x){
3637
}
3738

3839
// CHECK-LABEL: test_global_max_f64
39-
// CHECK: call double @llvm.amdgcn.global.atomic.fmax.f64.p1.f64(ptr addrspace(1) %{{.*}}, double %{{.*}})
40+
// CHECK: = atomicrmw fmax ptr addrspace(1) {{.+}}, double %{{.+}} syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
41+
4042
// GFX90A-LABEL: test_global_max_f64$local
4143
// GFX90A: global_atomic_max_f64
4244
void test_global_max_f64(__global double *addr, double x){
@@ -65,7 +67,8 @@ void test_flat_global_add_f64(__global double *addr, double x){
6567
}
6668

6769
// CHECK-LABEL: test_flat_min_flat_f64
68-
// CHECK: call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %{{.*}}, double %{{.*}})
70+
// CHECK: = atomicrmw fmin ptr {{.+}}, double %{{.+}} syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
71+
6972
// GFX90A-LABEL: test_flat_min_flat_f64$local
7073
// GFX90A: flat_atomic_min_f64
7174
void test_flat_min_flat_f64(__generic double *addr, double x){
@@ -74,7 +77,8 @@ void test_flat_min_flat_f64(__generic double *addr, double x){
7477
}
7578

7679
// CHECK-LABEL: test_flat_global_min_f64
77-
// CHECK: call double @llvm.amdgcn.flat.atomic.fmin.f64.p1.f64(ptr addrspace(1) %{{.*}}, double %{{.*}})
80+
// CHECK: = atomicrmw fmin ptr addrspace(1) {{.+}}, double %{{.+}} syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
81+
7882
// GFX90A: test_flat_global_min_f64$local
7983
// GFX90A: global_atomic_min_f64
8084
void test_flat_global_min_f64(__global double *addr, double x){
@@ -83,7 +87,8 @@ void test_flat_global_min_f64(__global double *addr, double x){
8387
}
8488

8589
// CHECK-LABEL: test_flat_max_flat_f64
86-
// CHECK: call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %{{.*}}, double %{{.*}})
90+
// CHECK: = atomicrmw fmax ptr {{.+}}, double %{{.+}} syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
91+
8792
// GFX90A-LABEL: test_flat_max_flat_f64$local
8893
// GFX90A: flat_atomic_max_f64
8994
void test_flat_max_flat_f64(__generic double *addr, double x){
@@ -92,7 +97,8 @@ void test_flat_max_flat_f64(__generic double *addr, double x){
9297
}
9398

9499
// CHECK-LABEL: test_flat_global_max_f64
95-
// CHECK: call double @llvm.amdgcn.flat.atomic.fmax.f64.p1.f64(ptr addrspace(1) %{{.*}}, double %{{.*}})
100+
// CHECK: = atomicrmw fmax ptr addrspace(1) {{.+}}, double %{{.+}} syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
101+
96102
// GFX90A-LABEL: test_flat_global_max_f64$local
97103
// GFX90A: global_atomic_max_f64
98104
void test_flat_global_max_f64(__global double *addr, double x){

0 commit comments

Comments
 (0)