1
1
#include " ../Inputs/cuda.h"
2
2
3
- // TODO: host build is currently crashing.
4
- // RUN-DISABLE: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
5
- // RUN-DISABLE: -x cuda -emit-cir -target-sdk-version=12.3 \
6
- // RUN-DISABLE: %s -o %t.cir
7
- // RUN-DISABLE: FileCheck --check-prefix=CIR-HOST --input-file=%t.cir %s
3
+ // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
4
+ // RUN: -x cuda -emit-cir -target-sdk-version=12.3 \
5
+ // RUN: %s -o %t.cir
6
+ // RUN: FileCheck --check-prefix=CIR-HOST --input-file=%t.cir %s
8
7
9
8
// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir \
10
9
// RUN: -fcuda-is-device -emit-cir -target-sdk-version=12.3 \
11
10
// RUN: %s -o %t.cir
12
11
// RUN: FileCheck --check-prefix=CIR-DEVICE --input-file=%t.cir %s
13
12
13
+ // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
14
+ // RUN: -x cuda -emit-llvm -target-sdk-version=12.3 \
15
+ // RUN: %s -o %t.ll
16
+ // RUN: FileCheck --check-prefix=LLVM-HOST --input-file=%t.ll %s
17
+
18
+ // RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir \
19
+ // RUN: -fcuda-is-device -emit-llvm -target-sdk-version=12.3 \
20
+ // RUN: %s -o %t.ll
21
+ // RUN: FileCheck --check-prefix=LLVM-DEVICE --input-file=%t.ll %s
22
+
23
+ // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \
24
+ // RUN: -x cuda -emit-llvm -target-sdk-version=12.3 \
25
+ // RUN: %s -o %t.ll
26
+ // RUN: FileCheck --check-prefix=OGCG-HOST --input-file=%t.ll %s
27
+
28
+ // RUN: %clang_cc1 -triple nvptx64-nvidia-cuda \
29
+ // RUN: -fcuda-is-device -emit-llvm -target-sdk-version=12.3 \
30
+ // RUN: %s -o %t.ll
31
+ // RUN: FileCheck --check-prefix=OGCG-DEVICE --input-file=%t.ll %s
32
+
14
33
// Attribute for global_fn
15
34
// CIR-HOST: [[Kernel:#[a-zA-Z_0-9]+]] = {{.*}}#cir.cu.kernel_name<_Z9global_fni>{{.*}}
16
35
@@ -25,6 +44,7 @@ __device__ void device_fn(int* a, double b, float c) {}
25
44
__global__ void global_fn (int a) {}
26
45
// CIR-DEVICE: @_Z9global_fni({{.*}} cc(ptx_kernel)
27
46
// LLVM-DEVICE: define dso_local ptx_kernel void @_Z9global_fni
47
+ // OGCG-DEVICE: define dso_local ptx_kernel void @_Z9global_fni
28
48
29
49
// Check for device stub emission.
30
50
@@ -38,10 +58,17 @@ __global__ void global_fn(int a) {}
38
58
// LLVM-HOST: void @_Z24__device_stub__global_fni
39
59
// LLVM-HOST: %[[#KernelArgs:]] = alloca [1 x ptr], i64 1, align 16
40
60
// LLVM-HOST: %[[#GEP1:]] = getelementptr ptr, ptr %[[#KernelArgs]], i32 0
41
- // LLVM-HOST: %[[#GEP2:]] = getelementptr ptr, ptr %[[#GEP1]] , i64 0
61
+ // LLVM-HOST: %[[#GEP2:]] = getelementptr [1 x ptr] , ptr %[[#KernelArgs]], i32 0 , i64 0
42
62
// LLVM-HOST: call i32 @__cudaPopCallConfiguration
43
63
// LLVM-HOST: call i32 @cudaLaunchKernel(ptr @_Z24__device_stub__global_fni
44
64
65
+ // OGCG-HOST: void @_Z24__device_stub__global_fni
66
+ // OGCG-HOST: %kernel_args = alloca ptr, i64 1, align 16
67
+ // OGCG-HOST: getelementptr ptr, ptr %kernel_args, i32 0
68
+ // OGCG-HOST: call i32 @__cudaPopCallConfiguration
69
+ // OGCG-HOST: call noundef i32 @cudaLaunchKernel(ptr noundef @_Z24__device_stub__global_fni
70
+
71
+
45
72
int main () {
46
73
global_fn<<<1 , 1 >>> (1 );
47
74
}
@@ -63,10 +90,29 @@ int main() {
63
90
// LLVM-HOST: alloca %struct.dim3
64
91
// LLVM-HOST: call void @_ZN4dim3C1Ejjj
65
92
// LLVM-HOST: call void @_ZN4dim3C1Ejjj
66
- // LLVM-HOST: [[LLVMConfigOK:%[0-9]+]] = call i32 @__cudaPushCallConfiguration
67
- // LLVM-HOST: br [[LLVMConfigOK]], label %[[#Good:]], label [[#Bad:]]
93
+ // LLVM-HOST: %[[#ConfigOK:]] = call i32 @__cudaPushCallConfiguration
94
+ // LLVM-HOST: %[[#ConfigCond:]] = icmp ne i32 %[[#ConfigOK]], 0
95
+ // LLVM-HOST: br i1 %[[#ConfigCond]], label %[[#Good:]], label %[[#Bad:]]
68
96
// LLVM-HOST: [[#Good]]:
69
- // LLVM-HOST: br label [[#End:]]
97
+ // LLVM-HOST: br label % [[#End:]]
70
98
// LLVM-HOST: [[#Bad]]:
71
- // LLVM-HOST: call void @_Z24__device_stub__global_fni
72
- // LLVM-HOST: br label [[#End]]
99
+ // LLVM-HOST: call void @_Z24__device_stub__global_fni(i32 1)
100
+ // LLVM-HOST: br label %[[#End:]]
101
+ // LLVM-HOST: [[#End]]:
102
+ // LLVM-HOST: %[[#]] = load i32
103
+ // LLVM-HOST: ret i32
104
+
105
+ // OGCG-HOST: define dso_local noundef i32 @main
106
+ // OGCG-HOST: alloca %struct.dim3, align 4
107
+ // OGCG-HOST: alloca %struct.dim3, align 4
108
+ // OGCG-HOST: call void @_ZN4dim3C1Ejjj
109
+ // OGCG-HOST: call void @_ZN4dim3C1Ejjj
110
+ // OGCG-HOST: %call = call i32 @__cudaPushCallConfiguration
111
+ // OGCG-HOST: %tobool = icmp ne i32 %call, 0
112
+ // OGCG-HOST: br i1 %tobool, label %kcall.end, label %kcall.configok
113
+ // OGCG-HOST: kcall.configok:
114
+ // OGCG-HOST: call void @_Z24__device_stub__global_fni(i32 noundef 1)
115
+ // OGCG-HOST: br label %kcall.end
116
+ // OGCG-HOST: kcall.end:
117
+ // OGCG-HOST: %{{[0-9]+}} = load i32, ptr %retval, align 4
118
+ // OGCG-HOST: ret i32
0 commit comments