|
| 1 | +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --version 5 |
| 2 | +// expected-no-diagnostics |
| 3 | + |
| 4 | +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=AMDGCN |
| 5 | +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --check-prefix=NVPTX |
| 6 | + |
| 7 | +typedef enum omp_allocator_handle_t { |
| 8 | + omp_null_allocator = 0, |
| 9 | + omp_default_mem_alloc = 1, |
| 10 | + omp_large_cap_mem_alloc = 2, |
| 11 | + omp_const_mem_alloc = 3, |
| 12 | + omp_high_bw_mem_alloc = 4, |
| 13 | + omp_low_lat_mem_alloc = 5, |
| 14 | + omp_cgroup_mem_alloc = 6, |
| 15 | + omp_pteam_mem_alloc = 7, |
| 16 | + omp_thread_mem_alloc = 8, |
| 17 | + KMP_ALLOCATOR_MAX_HANDLE = ~(0LU) |
| 18 | +} omp_allocator_handle_t; |
| 19 | + |
| 20 | +int d = 0; |
| 21 | +#pragma omp allocate(d) allocator(omp_default_mem_alloc) |
| 22 | + |
| 23 | +int g = 0; |
| 24 | +#pragma omp allocate(g) allocator(omp_cgroup_mem_alloc) |
| 25 | + |
| 26 | +extern const int c = 0; |
| 27 | +#pragma omp allocate(c) allocator(omp_const_mem_alloc) |
| 28 | + |
| 29 | + |
| 30 | +int foo() { |
| 31 | + int t = 0; |
| 32 | +#pragma omp allocate(t) allocator(omp_thread_mem_alloc) |
| 33 | + return t; |
| 34 | +} |
| 35 | + |
| 36 | +void bar() { |
| 37 | +#pragma omp target |
| 38 | + ; |
| 39 | +#pragma omp parallel |
| 40 | + ; |
| 41 | +} |
| 42 | + |
| 43 | +void baz(int *p) { |
| 44 | +#pragma omp atomic |
| 45 | + *p += 1; |
| 46 | +} |
| 47 | + |
| 48 | +int qux() { |
| 49 | +#if defined(__NVPTX__) |
| 50 | + return 1; |
| 51 | +#elif defined(__AMDGPU__) |
| 52 | + return 2; |
| 53 | +#endif |
| 54 | +} |
| 55 | +//. |
| 56 | +// AMDGCN: @c = addrspace(4) constant i32 0, align 4 |
| 57 | +// AMDGCN: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" |
| 58 | +// AMDGCN: @[[GLOB1:[0-9]+]] = private unnamed_addr addrspace(1) constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8 |
| 59 | +// AMDGCN: @d = global i32 0, align 4 |
| 60 | +// AMDGCN: @g = global i32 0, align 4 |
| 61 | +// AMDGCN: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500 |
| 62 | +//. |
| 63 | +// NVPTX: @d = global i32 0, align 4 |
| 64 | +// NVPTX: @g = global i32 0, align 4 |
| 65 | +// NVPTX: @c = addrspace(4) constant i32 0, align 4 |
| 66 | +// NVPTX: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" |
| 67 | +// NVPTX: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8 |
| 68 | +//. |
| 69 | +// AMDGCN-LABEL: define dso_local noundef i32 @_Z3foov( |
| 70 | +// AMDGCN-SAME: ) #[[ATTR0:[0-9]+]] { |
| 71 | +// AMDGCN-NEXT: [[ENTRY:.*:]] |
| 72 | +// AMDGCN-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5) |
| 73 | +// AMDGCN-NEXT: [[T:%.*]] = alloca i32, align 4, addrspace(5) |
| 74 | +// AMDGCN-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr |
| 75 | +// AMDGCN-NEXT: [[T_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[T]] to ptr |
| 76 | +// AMDGCN-NEXT: store i32 0, ptr [[T_ASCAST]], align 4 |
| 77 | +// AMDGCN-NEXT: [[TMP0:%.*]] = load i32, ptr [[T_ASCAST]], align 4 |
| 78 | +// AMDGCN-NEXT: ret i32 [[TMP0]] |
| 79 | +// |
| 80 | +// |
| 81 | +// AMDGCN-LABEL: define dso_local void @_Z3barv( |
| 82 | +// AMDGCN-SAME: ) #[[ATTR0]] { |
| 83 | +// AMDGCN-NEXT: [[ENTRY:.*:]] |
| 84 | +// AMDGCN-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(5) |
| 85 | +// AMDGCN-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr)) |
| 86 | +// AMDGCN-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr |
| 87 | +// AMDGCN-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @_Z3barv_omp_outlined, ptr @_Z3barv_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0) |
| 88 | +// AMDGCN-NEXT: ret void |
| 89 | +// |
| 90 | +// |
| 91 | +// AMDGCN-LABEL: define internal void @_Z3barv_omp_outlined( |
| 92 | +// AMDGCN-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { |
| 93 | +// AMDGCN-NEXT: [[ENTRY:.*:]] |
| 94 | +// AMDGCN-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| 95 | +// AMDGCN-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| 96 | +// AMDGCN-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr |
| 97 | +// AMDGCN-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr |
| 98 | +// AMDGCN-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 |
| 99 | +// AMDGCN-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 |
| 100 | +// AMDGCN-NEXT: ret void |
| 101 | +// |
| 102 | +// |
| 103 | +// AMDGCN-LABEL: define internal void @_Z3barv_omp_outlined_wrapper( |
| 104 | +// AMDGCN-SAME: i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] { |
| 105 | +// AMDGCN-NEXT: [[ENTRY:.*:]] |
| 106 | +// AMDGCN-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(5) |
| 107 | +// AMDGCN-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5) |
| 108 | +// AMDGCN-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5) |
| 109 | +// AMDGCN-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5) |
| 110 | +// AMDGCN-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr |
| 111 | +// AMDGCN-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr |
| 112 | +// AMDGCN-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr |
| 113 | +// AMDGCN-NEXT: [[GLOBAL_ARGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr |
| 114 | +// AMDGCN-NEXT: store i16 [[TMP0]], ptr [[DOTADDR_ASCAST]], align 2 |
| 115 | +// AMDGCN-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4 |
| 116 | +// AMDGCN-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4 |
| 117 | +// AMDGCN-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_ASCAST]]) |
| 118 | +// AMDGCN-NEXT: call void @_Z3barv_omp_outlined(ptr [[DOTADDR1_ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR3:[0-9]+]] |
| 119 | +// AMDGCN-NEXT: ret void |
| 120 | +// |
| 121 | +// |
| 122 | +// AMDGCN-LABEL: define dso_local void @_Z3bazPi( |
| 123 | +// AMDGCN-SAME: ptr noundef [[P:%.*]]) #[[ATTR0]] { |
| 124 | +// AMDGCN-NEXT: [[ENTRY:.*:]] |
| 125 | +// AMDGCN-NEXT: [[P_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| 126 | +// AMDGCN-NEXT: [[P_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[P_ADDR]] to ptr |
| 127 | +// AMDGCN-NEXT: store ptr [[P]], ptr [[P_ADDR_ASCAST]], align 8 |
| 128 | +// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr, ptr [[P_ADDR_ASCAST]], align 8 |
| 129 | +// AMDGCN-NEXT: [[TMP1:%.*]] = atomicrmw add ptr [[TMP0]], i32 1 monotonic, align 4 |
| 130 | +// AMDGCN-NEXT: ret void |
| 131 | +// |
| 132 | +// |
| 133 | +// AMDGCN-LABEL: define dso_local noundef i32 @_Z3quxv( |
| 134 | +// AMDGCN-SAME: ) #[[ATTR0]] { |
| 135 | +// AMDGCN-NEXT: [[ENTRY:.*:]] |
| 136 | +// AMDGCN-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5) |
| 137 | +// AMDGCN-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr |
| 138 | +// AMDGCN-NEXT: ret i32 2 |
| 139 | +// |
| 140 | +// |
| 141 | +// NVPTX-LABEL: define dso_local noundef i32 @_Z3foov( |
| 142 | +// NVPTX-SAME: ) #[[ATTR0:[0-9]+]] { |
| 143 | +// NVPTX-NEXT: [[ENTRY:.*:]] |
| 144 | +// NVPTX-NEXT: [[T:%.*]] = alloca i32, align 4 |
| 145 | +// NVPTX-NEXT: store i32 0, ptr [[T]], align 4 |
| 146 | +// NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[T]], align 4 |
| 147 | +// NVPTX-NEXT: ret i32 [[TMP0]] |
| 148 | +// |
| 149 | +// |
| 150 | +// NVPTX-LABEL: define dso_local void @_Z3barv( |
| 151 | +// NVPTX-SAME: ) #[[ATTR0]] { |
| 152 | +// NVPTX-NEXT: [[ENTRY:.*:]] |
| 153 | +// NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 |
| 154 | +// NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) |
| 155 | +// NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @_Z3barv_omp_outlined, ptr @_Z3barv_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) |
| 156 | +// NVPTX-NEXT: ret void |
| 157 | +// |
| 158 | +// |
| 159 | +// NVPTX-LABEL: define internal void @_Z3barv_omp_outlined( |
| 160 | +// NVPTX-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { |
| 161 | +// NVPTX-NEXT: [[ENTRY:.*:]] |
| 162 | +// NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 |
| 163 | +// NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 |
| 164 | +// NVPTX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 |
| 165 | +// NVPTX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 |
| 166 | +// NVPTX-NEXT: ret void |
| 167 | +// |
| 168 | +// |
| 169 | +// NVPTX-LABEL: define internal void @_Z3barv_omp_outlined_wrapper( |
| 170 | +// NVPTX-SAME: i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] { |
| 171 | +// NVPTX-NEXT: [[ENTRY:.*:]] |
| 172 | +// NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 |
| 173 | +// NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 |
| 174 | +// NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 |
| 175 | +// NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 |
| 176 | +// NVPTX-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2 |
| 177 | +// NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 |
| 178 | +// NVPTX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 |
| 179 | +// NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) |
| 180 | +// NVPTX-NEXT: call void @_Z3barv_omp_outlined(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3:[0-9]+]] |
| 181 | +// NVPTX-NEXT: ret void |
| 182 | +// |
| 183 | +// |
| 184 | +// NVPTX-LABEL: define dso_local void @_Z3bazPi( |
| 185 | +// NVPTX-SAME: ptr noundef [[P:%.*]]) #[[ATTR0]] { |
| 186 | +// NVPTX-NEXT: [[ENTRY:.*:]] |
| 187 | +// NVPTX-NEXT: [[P_ADDR:%.*]] = alloca ptr, align 8 |
| 188 | +// NVPTX-NEXT: store ptr [[P]], ptr [[P_ADDR]], align 8 |
| 189 | +// NVPTX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[P_ADDR]], align 8 |
| 190 | +// NVPTX-NEXT: [[TMP1:%.*]] = atomicrmw add ptr [[TMP0]], i32 1 monotonic, align 4 |
| 191 | +// NVPTX-NEXT: ret void |
| 192 | +// |
| 193 | +// |
| 194 | +// NVPTX-LABEL: define dso_local noundef i32 @_Z3quxv( |
| 195 | +// NVPTX-SAME: ) #[[ATTR0]] { |
| 196 | +// NVPTX-NEXT: [[ENTRY:.*:]] |
| 197 | +// NVPTX-NEXT: ret i32 1 |
| 198 | +// |
| 199 | +//. |
| 200 | +// AMDGCN: attributes #[[ATTR0]] = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } |
| 201 | +// AMDGCN: attributes #[[ATTR1]] = { convergent noinline norecurse nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } |
| 202 | +// AMDGCN: attributes #[[ATTR2]] = { convergent noinline norecurse nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" } |
| 203 | +// AMDGCN: attributes #[[ATTR3]] = { nounwind } |
| 204 | +// AMDGCN: attributes #[[ATTR4:[0-9]+]] = { alwaysinline } |
| 205 | +//. |
| 206 | +// NVPTX: attributes #[[ATTR0]] = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32" } |
| 207 | +// NVPTX: attributes #[[ATTR1]] = { convergent noinline norecurse nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32" } |
| 208 | +// NVPTX: attributes #[[ATTR2]] = { convergent noinline norecurse nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32" } |
| 209 | +// NVPTX: attributes #[[ATTR3]] = { nounwind } |
| 210 | +// NVPTX: attributes #[[ATTR4:[0-9]+]] = { alwaysinline } |
| 211 | +//. |
| 212 | +// AMDGCN: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500} |
| 213 | +// AMDGCN: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} |
| 214 | +// AMDGCN: [[META2:![0-9]+]] = !{i32 7, !"openmp", i32 45} |
| 215 | +// AMDGCN: [[META3:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} |
| 216 | +//. |
| 217 | +// NVPTX: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} |
| 218 | +// NVPTX: [[META1:![0-9]+]] = !{i32 7, !"openmp", i32 45} |
| 219 | +// NVPTX: [[META2:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} |
| 220 | +//. |
0 commit comments