22// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -fno-ident -fcuda-is-device \
33// RUN: -emit-llvm -o - %s | FileCheck -check-prefix=OPTNONE %s
44
5+ // RUN: %clang_cc1 -O3 -triple amdgcn-amd-amdhsa -x hip -fno-ident -fcuda-is-device \
6+ // RUN: -emit-llvm -o - %s | FileCheck -check-prefix=OPT %s
7+
58#define __device__ __attribute__ ((device))
69#define __global__ __attribute__ ((global))
710
1013// OPTNONE: @llvm.compiler.used = appending addrspace(1) global [1 x ptr] [ptr addrspacecast (ptr addrspace(1) @__hip_cuid_ to ptr)], section "llvm.metadata"
1114// OPTNONE: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500
1215// .
16+ // OPT: @__hip_cuid_ = addrspace(1) global i8 0
17+ // OPT: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500
18+ // OPT: @llvm.compiler.used = appending addrspace(1) global [1 x ptr] [ptr addrspacecast (ptr addrspace(1) @__hip_cuid_ to ptr)], section "llvm.metadata"
19+ // .
1320__device__ void extern_func();
1421
1522// OPTNONE: Function Attrs: convergent mustprogress noinline nounwind optnone
@@ -19,6 +26,13 @@ __device__ void extern_func();
1926// OPTNONE-NEXT: call void @_Z11extern_funcv() #[[ATTR3:[0-9]+]]
2027// OPTNONE-NEXT: ret void
2128//
29+ // OPT: Function Attrs: convergent mustprogress nounwind
30+ // OPT-LABEL: define {{[^@]+}}@_Z4funcv
31+ // OPT-SAME: () local_unnamed_addr #[[ATTR0:[0-9]+]] {
32+ // OPT-NEXT: entry:
33+ // OPT-NEXT: tail call void @_Z11extern_funcv() #[[ATTR3:[0-9]+]]
34+ // OPT-NEXT: ret void
35+ //
2236__device__ void func () {
2337 extern_func ();
2438}
@@ -30,6 +44,13 @@ __device__ void func() {
3044// OPTNONE-NEXT: call void @_Z11extern_funcv() #[[ATTR3]]
3145// OPTNONE-NEXT: ret void
3246//
47+ // OPT: Function Attrs: convergent mustprogress norecurse nounwind
48+ // OPT-LABEL: define {{[^@]+}}@_Z6kernelv
49+ // OPT-SAME: () local_unnamed_addr #[[ATTR2:[0-9]+]] {
50+ // OPT-NEXT: entry:
51+ // OPT-NEXT: tail call void @_Z11extern_funcv() #[[ATTR3]]
52+ // OPT-NEXT: ret void
53+ //
3354__global__ void kernel () {
3455 extern_func ();
3556}
@@ -39,7 +60,16 @@ __global__ void kernel() {
3960// OPTNONE: attributes #[[ATTR2]] = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
4061// OPTNONE: attributes #[[ATTR3]] = { convergent nounwind }
4162// .
63+ // OPT: attributes #[[ATTR0]] = { convergent mustprogress nounwind "amdgpu-waves-per-eu"="4,10" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" }
64+ // OPT: attributes #[[ATTR1:[0-9]+]] = { convergent nounwind "amdgpu-waves-per-eu"="4,10" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" }
65+ // OPT: attributes #[[ATTR2]] = { convergent mustprogress norecurse nounwind "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
66+ // OPT: attributes #[[ATTR3]] = { convergent nounwind }
67+ // .
4268// OPTNONE: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
4369// OPTNONE: [[META1:![0-9]+]] = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
4470// OPTNONE: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
4571// .
72+ // OPT: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
73+ // OPT: [[META1:![0-9]+]] = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
74+ // OPT: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
75+ // .
0 commit comments