Skip to content

Commit 761b3e2

Browse files
committed
[AMDGPU] Allow overload of __builtin_amdgcn_mov/update_dpp
We need to support 64-bit data types (intrinsics do support it). We are also silently converting FP to integer argument now, also fixed.
1 parent b0a2546 commit 761b3e2

File tree

3 files changed

+71
-13
lines changed

3 files changed

+71
-13
lines changed

clang/include/clang/Basic/BuiltinsAMDGPU.def

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -224,8 +224,8 @@ TARGET_BUILTIN(__builtin_amdgcn_frexp_exph, "sh", "nc", "16-bit-insts")
224224
TARGET_BUILTIN(__builtin_amdgcn_fracth, "hh", "nc", "16-bit-insts")
225225
TARGET_BUILTIN(__builtin_amdgcn_classh, "bhi", "nc", "16-bit-insts")
226226
TARGET_BUILTIN(__builtin_amdgcn_s_memrealtime, "WUi", "n", "s-memrealtime")
227-
TARGET_BUILTIN(__builtin_amdgcn_mov_dpp, "iiIiIiIiIb", "nc", "dpp")
228-
TARGET_BUILTIN(__builtin_amdgcn_update_dpp, "iiiIiIiIiIb", "nc", "dpp")
227+
TARGET_BUILTIN(__builtin_amdgcn_mov_dpp, "iiIiIiIiIb", "nct", "dpp")
228+
TARGET_BUILTIN(__builtin_amdgcn_update_dpp, "iiiIiIiIiIb", "nct", "dpp")
229229
TARGET_BUILTIN(__builtin_amdgcn_s_dcache_wb, "v", "n", "gfx8-insts")
230230
TARGET_BUILTIN(__builtin_amdgcn_perm, "UiUiUiUi", "nc", "gfx8-insts")
231231

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19038,15 +19038,27 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
1903819038
ASTContext::GetBuiltinTypeError Error;
1903919039
getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
1904019040
assert(Error == ASTContext::GE_None && "Should not codegen an error");
19041+
llvm::Type *DataTy = ConvertType(E->getArg(0)->getType());
19042+
llvm::Type *IntTy = llvm::IntegerType::get(
19043+
Builder.getContext(), DataTy->getPrimitiveSizeInBits());
19044+
Function *F =
19045+
CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, IntTy);
19046+
assert(E->getNumArgs() == 5 || E->getNumArgs() == 6);
19047+
bool InsertOld = E->getNumArgs() == 5;
19048+
if (InsertOld)
19049+
Args.push_back(llvm::PoisonValue::get(IntTy));
1904119050
for (unsigned I = 0; I != E->getNumArgs(); ++I) {
19042-
Args.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, I, E));
19051+
llvm::Value *V = EmitScalarOrConstFoldImmArg(ICEArguments, I, E);
19052+
llvm::Type *ExpTy =
19053+
F->getFunctionType()->getFunctionParamType(I + InsertOld);
19054+
if (V->getType() != ExpTy)
19055+
V = Builder.CreateTruncOrBitCast(V, ExpTy);
19056+
Args.push_back(V);
1904319057
}
19044-
assert(Args.size() == 5 || Args.size() == 6);
19045-
if (Args.size() == 5)
19046-
Args.insert(Args.begin(), llvm::PoisonValue::get(Args[0]->getType()));
19047-
Function *F =
19048-
CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType());
19049-
return Builder.CreateCall(F, Args);
19058+
llvm::Value *V = Builder.CreateCall(F, Args);
19059+
if (!DataTy->isIntegerTy())
19060+
V = Builder.CreateBitCast(V, DataTy);
19061+
return V;
1905019062
}
1905119063
case AMDGPU::BI__builtin_amdgcn_permlane16:
1905219064
case AMDGPU::BI__builtin_amdgcn_permlanex16:

clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl

Lines changed: 50 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -102,20 +102,66 @@ void test_s_dcache_wb()
102102
__builtin_amdgcn_s_dcache_wb();
103103
}
104104

105-
// CHECK-LABEL: @test_mov_dpp
105+
// CHECK-LABEL: @test_mov_dpp_int
106106
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %src, i32 0, i32 0, i32 0, i1 false)
107-
void test_mov_dpp(global int* out, int src)
107+
void test_mov_dpp_int(global int* out, int src)
108108
{
109109
*out = __builtin_amdgcn_mov_dpp(src, 0, 0, 0, false);
110110
}
111111

112-
// CHECK-LABEL: @test_update_dpp
112+
// CHECK-LABEL: @test_mov_dpp_long
113+
// CHECK: %0 = tail call{{.*}} i64 @llvm.amdgcn.update.dpp.i64(i64 poison, i64 %x, i32 257, i32 15, i32 15, i1 false)
114+
// CHECK-NEXT: store i64 %0,
115+
void test_mov_dpp_long(long x, global long *p) {
116+
*p = __builtin_amdgcn_mov_dpp(x, 0x101, 0xf, 0xf, 0);
117+
}
118+
119+
// CHECK-LABEL: @test_mov_dpp_float
120+
// CHECK: %0 = bitcast float %x to i32
121+
// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %0, i32 257, i32 15, i32 15, i1 false)
122+
// CHECK-NEXT: store i32 %1,
123+
void test_mov_dpp_float(float x, global float *p) {
124+
*p = __builtin_amdgcn_mov_dpp(x, 0x101, 0xf, 0xf, 0);
125+
}
126+
127+
// CHECK-LABEL: @test_mov_dpp_double
128+
// CHECK: %0 = bitcast double %x to i64
129+
// CHECK-NEXT: %1 = tail call{{.*}} i64 @llvm.amdgcn.update.dpp.i64(i64 poison, i64 %0, i32 257, i32 15, i32 15, i1 false)
130+
// CHECK-NEXT: store i64 %1,
131+
void test_mov_dpp_double(double x, global double *p) {
132+
*p = __builtin_amdgcn_mov_dpp(x, 0x101, 0xf, 0xf, 0);
133+
}
134+
135+
// CHECK-LABEL: @test_update_dpp_int
113136
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %arg1, i32 %arg2, i32 0, i32 0, i32 0, i1 false)
114-
void test_update_dpp(global int* out, int arg1, int arg2)
137+
void test_update_dpp_int(global int* out, int arg1, int arg2)
115138
{
116139
*out = __builtin_amdgcn_update_dpp(arg1, arg2, 0, 0, 0, false);
117140
}
118141

142+
// CHECK-LABEL: @test_update_dpp_long
143+
// CHECK: %0 = tail call{{.*}} i64 @llvm.amdgcn.update.dpp.i64(i64 %x, i64 %x, i32 257, i32 15, i32 15, i1 false)
144+
// CHECk-NEXT: store i64 %0,
145+
void test_update_dpp_long(long x, global long *p) {
146+
*p = __builtin_amdgcn_update_dpp(x, x, 0x101, 0xf, 0xf, 0);
147+
}
148+
149+
// CHECK-LABEL: @test_update_dpp_float
150+
// CHECK: %0 = bitcast float %x to i32
151+
// CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %0, i32 %0, i32 257, i32 15, i32 15, i1 false)
152+
// CHECK-NEXT: store i32 %1,
153+
void test_update_dpp_float(float x, global float *p) {
154+
*p = __builtin_amdgcn_update_dpp(x, x, 0x101, 0xf, 0xf, 0);
155+
}
156+
157+
// CHECK-LABEL: @test_update_dpp_double
158+
// CHECK: %0 = bitcast double %x to i64
159+
// CHECK-NEXT: %1 = tail call{{.*}} i64 @llvm.amdgcn.update.dpp.i64(i64 %0, i64 %0, i32 257, i32 15, i32 15, i1 false)
160+
// CHECK-NEXT: store i64 %1,
161+
void test_update_dpp_double(double x, global double *p) {
162+
*p = __builtin_amdgcn_update_dpp(x, x, 0x101, 0xf, 0xf, 0);
163+
}
164+
119165
// CHECK-LABEL: @test_ds_fadd
120166
// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
121167
// CHECK: atomicrmw volatile fadd ptr addrspace(3) %out, float %src monotonic, align 4{{$}}

0 commit comments

Comments
 (0)