55// RUN: -emit-cir %s -o %t.cir
66// RUN: FileCheck --check-prefix=CIR-HOST --input-file=%t.cir %s
77
8- // RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip \
8+ // RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir \
99// RUN: -fcuda-is-device -fhip-new-launch-api \
1010// RUN: -emit-cir %s -o %t.cir
1111// RUN: FileCheck --check-prefix=CIR-DEVICE --input-file=%t.cir %s
12+ //
13+ // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
14+ // RUN: -x hip -emit-llvm -fhip-new-launch-api \
15+ // RUN: %s -o %t.ll
16+ // RUN: FileCheck --check-prefix=LLVM-HOST --input-file=%t.ll %s
1217
1318// Attribute for global_fn
1419// CIR-HOST: [[Kernel:#[a-zA-Z_0-9]+]] = {{.*}}#cir.cu.kernel_name<_Z9global_fni>{{.*}}
@@ -25,14 +30,21 @@ __device__ void device_fn(int* a, double b, float c) {}
2530__global__ void global_fn (int a) {}
2631// CIR-DEVICE: @_Z9global_fni
2732
28- // CIR-HOST: cir.alloca {{.*}}"kernel_args"
33+ // CIR-HOST: @_Z24__device_stub__global_fni{{.*}}extra([[Kernel]])
34+ // CIR-HOST: %[[#CIRKernelArgs:]] = cir.alloca {{.*}}"kernel_args"
35+ // CIR-HOST: %[[#Decayed:]] = cir.cast array_to_ptrdecay %[[#CIRKernelArgs]]
2936// CIR-HOST: cir.call @__hipPopCallConfiguration
30-
31- // Host access the global stub instead of the functiond evice stub.
32- // The stub has the mangled name of the function
33- // CIR-HOST: cir.get_global @_Z9global_fni
37+ // CIR-HOST: cir.get_global @_Z9global_fni : !cir.ptr<!cir.ptr<!cir.func<(!s32i)>>>
3438// CIR-HOST: cir.call @hipLaunchKernel
3539
40+ // LLVM-HOST: void @_Z24__device_stub__global_fni
41+ // LLVM-HOST: %[[#KernelArgs:]] = alloca [1 x ptr], i64 1, align 16
42+ // LLVM-HOST: %[[#GEP1:]] = getelementptr ptr, ptr %[[#KernelArgs]], i32 0
43+ // LLVM-HOST: %[[#GEP2:]] = getelementptr [1 x ptr], ptr %[[#KernelArgs]], i32 0, i64 0
44+ // LLVM-HOST: call i32 @__hipPopCallConfiguration
45+ // LLVM-HOST: call i32 @hipLaunchKernel(ptr @_Z9global_fni
46+
47+
3648int main () {
3749 global_fn<<<1 , 1 >>>(1 );
3850}
@@ -49,4 +61,20 @@ int main() {
4961// CIR-HOST: cir.call @_Z24__device_stub__global_fni([[Arg]])
5062// CIR-HOST: }
5163
64+ // LLVM-HOST: define dso_local i32 @main
65+ // LLVM-HOST: alloca %struct.dim3
66+ // LLVM-HOST: alloca %struct.dim3
67+ // LLVM-HOST: call void @_ZN4dim3C1Ejjj
68+ // LLVM-HOST: call void @_ZN4dim3C1Ejjj
69+ // LLVM-HOST: %[[#ConfigOK:]] = call i32 @__hipPushCallConfiguration
70+ // LLVM-HOST: %[[#ConfigCond:]] = icmp ne i32 %[[#ConfigOK]], 0
71+ // LLVM-HOST: br i1 %[[#ConfigCond]], label %[[#Good:]], label %[[#Bad:]]
72+ // LLVM-HOST: [[#Good]]:
73+ // LLVM-HOST: br label %[[#End:]]
74+ // LLVM-HOST: [[#Bad]]:
75+ // LLVM-HOST: call void @_Z24__device_stub__global_fni(i32 1)
76+ // LLVM-HOST: br label %[[#End:]]
77+ // LLVM-HOST: [[#End]]:
78+ // LLVM-HOST: %[[#]] = load i32
79+ // LLVM-HOST: ret i32
5280
0 commit comments