Skip to content

Commit 0a77e15

Browse files
authored
[CIR][HIP] Use GlobalView to access stub (#1957)
1 parent d69ce8a commit 0a77e15

File tree

2 files changed

+46
-11
lines changed

2 files changed

+46
-11
lines changed

clang/lib/CIR/CodeGen/CIRGenCUDANV.cpp

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -265,12 +265,20 @@ void CIRGenNVCUDARuntime::emitDeviceStubBodyNew(CIRGenFunction &cgf,
265265

266266
void CIRGenNVCUDARuntime::emitDeviceStub(CIRGenFunction &cgf, cir::FuncOp fn,
267267
FunctionArgList &args) {
268+
268269
if (auto globalOp =
269270
llvm::dyn_cast<cir::GlobalOp>(KernelHandles[fn.getSymName()])) {
270-
auto symbol = mlir::FlatSymbolRefAttr::get(fn.getSymNameAttr());
271-
// Set the initializer for the global
272-
cgm.setInitializer(globalOp, symbol);
271+
auto &builder = cgm.getBuilder();
272+
auto fnPtrTy = globalOp.getSymType();
273+
auto sym = mlir::FlatSymbolRefAttr::get(fn.getSymNameAttr());
274+
auto gv = cir::GlobalViewAttr::get(fnPtrTy, sym);
275+
276+
globalOp->setAttr("initial_value", gv);
277+
globalOp->removeAttr("sym_visibility");
278+
globalOp->setAttr("alignment", builder.getI64IntegerAttr(
279+
cgm.getPointerAlign().getQuantity()));
273280
}
281+
274282
// CUDA 9.0 changed the way to launch kernels.
275283
if (CudaFeatureEnabled(cgm.getTarget().getSDKVersion(),
276284
CudaFeature::CUDA_USES_NEW_LAUNCH) ||
@@ -322,12 +330,11 @@ mlir::Operation *CIRGenNVCUDARuntime::getKernelHandle(cir::FuncOp fn,
322330
cgm, fn->getLoc(), globalName,
323331
builder.getPointerTo(fn.getFunctionType()), true,
324332
cir::AddressSpace::Default,
325-
/*insertPoint=*/nullptr, fn.getLinkage());
333+
/*insertPoint=*/nullptr);
326334
});
327335

328336
globalOp->setAttr("alignment", builder.getI64IntegerAttr(
329337
cgm.getPointerAlign().getQuantity()));
330-
globalOp->setAttr("visibility", fn->getAttr("sym_visibility"));
331338

332339
// Store references
333340
KernelHandles[fn.getSymName()] = globalOp;

clang/test/CIR/CodeGen/HIP/simple.cpp

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,15 @@
55
// RUN: -emit-cir %s -o %t.cir
66
// RUN: FileCheck --check-prefix=CIR-HOST --input-file=%t.cir %s
77

8-
// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip \
8+
// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir \
99
// RUN: -fcuda-is-device -fhip-new-launch-api \
1010
// RUN: -emit-cir %s -o %t.cir
1111
// RUN: FileCheck --check-prefix=CIR-DEVICE --input-file=%t.cir %s
12+
//
13+
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
14+
// RUN: -x hip -emit-llvm -fhip-new-launch-api \
15+
// RUN: %s -o %t.ll
16+
// RUN: FileCheck --check-prefix=LLVM-HOST --input-file=%t.ll %s
1217

1318
// Attribute for global_fn
1419
// CIR-HOST: [[Kernel:#[a-zA-Z_0-9]+]] = {{.*}}#cir.cu.kernel_name<_Z9global_fni>{{.*}}
@@ -25,14 +30,21 @@ __device__ void device_fn(int* a, double b, float c) {}
2530
__global__ void global_fn(int a) {}
2631
// CIR-DEVICE: @_Z9global_fni
2732

28-
// CIR-HOST: cir.alloca {{.*}}"kernel_args"
33+
// CIR-HOST: @_Z24__device_stub__global_fni{{.*}}extra([[Kernel]])
34+
// CIR-HOST: %[[#CIRKernelArgs:]] = cir.alloca {{.*}}"kernel_args"
35+
// CIR-HOST: %[[#Decayed:]] = cir.cast array_to_ptrdecay %[[#CIRKernelArgs]]
2936
// CIR-HOST: cir.call @__hipPopCallConfiguration
30-
31-
// Host access the global stub instead of the functiond evice stub.
32-
// The stub has the mangled name of the function
33-
// CIR-HOST: cir.get_global @_Z9global_fni
37+
// CIR-HOST: cir.get_global @_Z9global_fni : !cir.ptr<!cir.ptr<!cir.func<(!s32i)>>>
3438
// CIR-HOST: cir.call @hipLaunchKernel
3539

40+
// LLVM-HOST: void @_Z24__device_stub__global_fni
41+
// LLVM-HOST: %[[#KernelArgs:]] = alloca [1 x ptr], i64 1, align 16
42+
// LLVM-HOST: %[[#GEP1:]] = getelementptr ptr, ptr %[[#KernelArgs]], i32 0
43+
// LLVM-HOST: %[[#GEP2:]] = getelementptr [1 x ptr], ptr %[[#KernelArgs]], i32 0, i64 0
44+
// LLVM-HOST: call i32 @__hipPopCallConfiguration
45+
// LLVM-HOST: call i32 @hipLaunchKernel(ptr @_Z9global_fni
46+
47+
3648
int main() {
3749
global_fn<<<1, 1>>>(1);
3850
}
@@ -49,4 +61,20 @@ int main() {
4961
// CIR-HOST: cir.call @_Z24__device_stub__global_fni([[Arg]])
5062
// CIR-HOST: }
5163

64+
// LLVM-HOST: define dso_local i32 @main
65+
// LLVM-HOST: alloca %struct.dim3
66+
// LLVM-HOST: alloca %struct.dim3
67+
// LLVM-HOST: call void @_ZN4dim3C1Ejjj
68+
// LLVM-HOST: call void @_ZN4dim3C1Ejjj
69+
// LLVM-HOST: %[[#ConfigOK:]] = call i32 @__hipPushCallConfiguration
70+
// LLVM-HOST: %[[#ConfigCond:]] = icmp ne i32 %[[#ConfigOK]], 0
71+
// LLVM-HOST: br i1 %[[#ConfigCond]], label %[[#Good:]], label %[[#Bad:]]
72+
// LLVM-HOST: [[#Good]]:
73+
// LLVM-HOST: br label %[[#End:]]
74+
// LLVM-HOST: [[#Bad]]:
75+
// LLVM-HOST: call void @_Z24__device_stub__global_fni(i32 1)
76+
// LLVM-HOST: br label %[[#End:]]
77+
// LLVM-HOST: [[#End]]:
78+
// LLVM-HOST: %[[#]] = load i32
79+
// LLVM-HOST: ret i32
5280

0 commit comments

Comments
 (0)