Skip to content

Commit 6a7b5fe

Browse files
committed
update to make omp target illegal
1 parent 3329b7a commit 6a7b5fe

File tree

5 files changed

+238
-4
lines changed

5 files changed

+238
-4
lines changed

clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1309,6 +1309,19 @@ void CGOpenMPRuntimeGPU::emitBarrierCall(CodeGenFunction &CGF,
13091309
Args);
13101310
}
13111311

1312+
void CGOpenMPRuntimeGPU::emitTargetCall(
1313+
CodeGenFunction &CGF, const OMPExecutableDirective &D,
1314+
llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
1315+
llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
1316+
llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
1317+
const OMPLoopDirective &D)>
1318+
SizeEmitter) {
1319+
SmallString<256> Buffer;
1320+
llvm::raw_svector_ostream Out(Buffer);
1321+
Out << "Cannot emit a '#pragma omp target' on the GPU";
1322+
CGM.Error(D.getBeginLoc(), Out.str());
1323+
}
1324+
13121325
void CGOpenMPRuntimeGPU::emitCriticalRegion(
13131326
CodeGenFunction &CGF, StringRef CriticalName,
13141327
const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,

clang/lib/CodeGen/CGOpenMPRuntimeGPU.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,16 @@ class CGOpenMPRuntimeGPU : public CGOpenMPRuntime {
248248
OpenMPDirectiveKind Kind, bool EmitChecks = true,
249249
bool ForceSimpleCall = false) override;
250250

251+
/// Emit the target offloading code associated with \a D. This is not
252+
/// supported by the GPU-side and simply returns an error.
253+
virtual void emitTargetCall(
254+
CodeGenFunction &CGF, const OMPExecutableDirective &D,
255+
llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
256+
llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
257+
llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
258+
const OMPLoopDirective &D)>
259+
SizeEmitter);
260+
251261
/// Emits a critical region.
252262
/// \param CriticalName Name of the critical region.
253263
/// \param CriticalOpGen Generator for the statement associated with the given

clang/lib/CodeGen/CGStmtOpenMP.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6801,8 +6801,7 @@ static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
68016801
CodeGenModule &CGM = CGF.CGM;
68026802

68036803
// On device emit this construct as inlined code.
6804-
if (CGM.getLangOpts().OpenMPIsTargetDevice ||
6805-
CGM.getOpenMPRuntime().isGPU()) {
6804+
if (CGM.getLangOpts().OpenMPIsTargetDevice) {
68066805
OMPLexicalScope Scope(CGF, S, OMPD_target);
68076806
CGM.getOpenMPRuntime().emitInlinedDirective(
68086807
CGF, OMPD_target, [&S](CodeGenFunction &CGF, PrePostActionTy &) {

clang/test/OpenMP/gpu_target.cpp

Lines changed: 207 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,11 @@ int foo() {
3434
}
3535

3636
void bar() {
37-
#pragma omp target
38-
;
3937
#pragma omp parallel
4038
;
39+
#pragma omp parallel for
40+
for (int i = 0; i < 1; ++i)
41+
;
4142
}
4243

4344
void baz(int *p) {
@@ -56,6 +57,7 @@ int qux() {
5657
// AMDGCN: @c = addrspace(4) constant i32 0, align 4
5758
// AMDGCN: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
5859
// AMDGCN: @[[GLOB1:[0-9]+]] = private unnamed_addr addrspace(1) constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8
60+
// AMDGCN: @[[GLOB2:[0-9]+]] = private unnamed_addr addrspace(1) constant %struct.ident_t { i32 0, i32 514, i32 0, i32 22, ptr @[[GLOB0]] }, align 8
5961
// AMDGCN: @d = global i32 0, align 4
6062
// AMDGCN: @g = global i32 0, align 4
6163
// AMDGCN: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500
@@ -65,6 +67,7 @@ int qux() {
6567
// NVPTX: @c = addrspace(4) constant i32 0, align 4
6668
// NVPTX: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
6769
// NVPTX: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8
70+
// NVPTX: @[[GLOB2:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 514, i32 0, i32 22, ptr @[[GLOB0]] }, align 8
6871
//.
6972
// AMDGCN-LABEL: define dso_local noundef i32 @_Z3foov(
7073
// AMDGCN-SAME: ) #[[ATTR0:[0-9]+]] {
@@ -82,9 +85,12 @@ int qux() {
8285
// AMDGCN-SAME: ) #[[ATTR0]] {
8386
// AMDGCN-NEXT: [[ENTRY:.*:]]
8487
// AMDGCN-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(5)
88+
// AMDGCN-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8, addrspace(5)
8589
// AMDGCN-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr))
8690
// AMDGCN-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
91+
// AMDGCN-NEXT: [[CAPTURED_VARS_ADDRS1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS1]] to ptr
8792
// AMDGCN-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @_Z3barv_omp_outlined, ptr @_Z3barv_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0)
93+
// AMDGCN-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @_Z3barv_omp_outlined.1, ptr @_Z3barv_omp_outlined.1_wrapper, ptr [[CAPTURED_VARS_ADDRS1_ASCAST]], i64 0)
8894
// AMDGCN-NEXT: ret void
8995
//
9096
//
@@ -119,6 +125,111 @@ int qux() {
119125
// AMDGCN-NEXT: ret void
120126
//
121127
//
128+
// AMDGCN-LABEL: define internal void @_Z3barv_omp_outlined.1(
129+
// AMDGCN-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
130+
// AMDGCN-NEXT: [[ENTRY:.*:]]
131+
// AMDGCN-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
132+
// AMDGCN-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
133+
// AMDGCN-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
134+
// AMDGCN-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
135+
// AMDGCN-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
136+
// AMDGCN-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
137+
// AMDGCN-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
138+
// AMDGCN-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
139+
// AMDGCN-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
140+
// AMDGCN-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
141+
// AMDGCN-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
142+
// AMDGCN-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
143+
// AMDGCN-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
144+
// AMDGCN-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
145+
// AMDGCN-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
146+
// AMDGCN-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
147+
// AMDGCN-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
148+
// AMDGCN-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
149+
// AMDGCN-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
150+
// AMDGCN-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
151+
// AMDGCN-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
152+
// AMDGCN-NEXT: store i32 0, ptr [[DOTOMP_UB_ASCAST]], align 4
153+
// AMDGCN-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
154+
// AMDGCN-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
155+
// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
156+
// AMDGCN-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
157+
// AMDGCN-NEXT: call void @__kmpc_for_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
158+
// AMDGCN-NEXT: br label %[[OMP_DISPATCH_COND:.*]]
159+
// AMDGCN: [[OMP_DISPATCH_COND]]:
160+
// AMDGCN-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
161+
// AMDGCN-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 0
162+
// AMDGCN-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
163+
// AMDGCN: [[COND_TRUE]]:
164+
// AMDGCN-NEXT: br label %[[COND_END:.*]]
165+
// AMDGCN: [[COND_FALSE]]:
166+
// AMDGCN-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
167+
// AMDGCN-NEXT: br label %[[COND_END]]
168+
// AMDGCN: [[COND_END]]:
169+
// AMDGCN-NEXT: [[COND:%.*]] = phi i32 [ 0, %[[COND_TRUE]] ], [ [[TMP3]], %[[COND_FALSE]] ]
170+
// AMDGCN-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
171+
// AMDGCN-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
172+
// AMDGCN-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
173+
// AMDGCN-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
174+
// AMDGCN-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
175+
// AMDGCN-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
176+
// AMDGCN-NEXT: br i1 [[CMP1]], label %[[OMP_DISPATCH_BODY:.*]], label %[[OMP_DISPATCH_END:.*]]
177+
// AMDGCN: [[OMP_DISPATCH_BODY]]:
178+
// AMDGCN-NEXT: br label %[[OMP_INNER_FOR_COND:.*]]
179+
// AMDGCN: [[OMP_INNER_FOR_COND]]:
180+
// AMDGCN-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
181+
// AMDGCN-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
182+
// AMDGCN-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
183+
// AMDGCN-NEXT: br i1 [[CMP2]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]]
184+
// AMDGCN: [[OMP_INNER_FOR_BODY]]:
185+
// AMDGCN-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
186+
// AMDGCN-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
187+
// AMDGCN-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
188+
// AMDGCN-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
189+
// AMDGCN-NEXT: br label %[[OMP_BODY_CONTINUE:.*]]
190+
// AMDGCN: [[OMP_BODY_CONTINUE]]:
191+
// AMDGCN-NEXT: br label %[[OMP_INNER_FOR_INC:.*]]
192+
// AMDGCN: [[OMP_INNER_FOR_INC]]:
193+
// AMDGCN-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
194+
// AMDGCN-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
195+
// AMDGCN-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV_ASCAST]], align 4
196+
// AMDGCN-NEXT: br label %[[OMP_INNER_FOR_COND]]
197+
// AMDGCN: [[OMP_INNER_FOR_END]]:
198+
// AMDGCN-NEXT: br label %[[OMP_DISPATCH_INC:.*]]
199+
// AMDGCN: [[OMP_DISPATCH_INC]]:
200+
// AMDGCN-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
201+
// AMDGCN-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
202+
// AMDGCN-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
203+
// AMDGCN-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB_ASCAST]], align 4
204+
// AMDGCN-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
205+
// AMDGCN-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
206+
// AMDGCN-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
207+
// AMDGCN-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB_ASCAST]], align 4
208+
// AMDGCN-NEXT: br label %[[OMP_DISPATCH_COND]]
209+
// AMDGCN: [[OMP_DISPATCH_END]]:
210+
// AMDGCN-NEXT: call void @__kmpc_for_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP1]])
211+
// AMDGCN-NEXT: ret void
212+
//
213+
//
214+
// AMDGCN-LABEL: define internal void @_Z3barv_omp_outlined.1_wrapper(
215+
// AMDGCN-SAME: i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2]] {
216+
// AMDGCN-NEXT: [[ENTRY:.*:]]
217+
// AMDGCN-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(5)
218+
// AMDGCN-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
219+
// AMDGCN-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
220+
// AMDGCN-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
221+
// AMDGCN-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
222+
// AMDGCN-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
223+
// AMDGCN-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
224+
// AMDGCN-NEXT: [[GLOBAL_ARGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
225+
// AMDGCN-NEXT: store i16 [[TMP0]], ptr [[DOTADDR_ASCAST]], align 2
226+
// AMDGCN-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
227+
// AMDGCN-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
228+
// AMDGCN-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_ASCAST]])
229+
// AMDGCN-NEXT: call void @_Z3barv_omp_outlined.1(ptr [[DOTADDR1_ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR3]]
230+
// AMDGCN-NEXT: ret void
231+
//
232+
//
122233
// AMDGCN-LABEL: define dso_local void @_Z3bazPi(
123234
// AMDGCN-SAME: ptr noundef [[P:%.*]]) #[[ATTR0]] {
124235
// AMDGCN-NEXT: [[ENTRY:.*:]]
@@ -151,8 +262,10 @@ int qux() {
151262
// NVPTX-SAME: ) #[[ATTR0]] {
152263
// NVPTX-NEXT: [[ENTRY:.*:]]
153264
// NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
265+
// NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
154266
// NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
155267
// NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @_Z3barv_omp_outlined, ptr @_Z3barv_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
268+
// NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @_Z3barv_omp_outlined1, ptr @_Z3barv_omp_outlined1_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
156269
// NVPTX-NEXT: ret void
157270
//
158271
//
@@ -181,6 +294,98 @@ int qux() {
181294
// NVPTX-NEXT: ret void
182295
//
183296
//
297+
// NVPTX-LABEL: define internal void @_Z3barv_omp_outlined1(
298+
// NVPTX-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
299+
// NVPTX-NEXT: [[ENTRY:.*:]]
300+
// NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
301+
// NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
302+
// NVPTX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
303+
// NVPTX-NEXT: [[TMP:%.*]] = alloca i32, align 4
304+
// NVPTX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
305+
// NVPTX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
306+
// NVPTX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
307+
// NVPTX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
308+
// NVPTX-NEXT: [[I:%.*]] = alloca i32, align 4
309+
// NVPTX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
310+
// NVPTX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
311+
// NVPTX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
312+
// NVPTX-NEXT: store i32 0, ptr [[DOTOMP_UB]], align 4
313+
// NVPTX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
314+
// NVPTX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
315+
// NVPTX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
316+
// NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
317+
// NVPTX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
318+
// NVPTX-NEXT: br label %[[OMP_DISPATCH_COND:.*]]
319+
// NVPTX: [[OMP_DISPATCH_COND]]:
320+
// NVPTX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
321+
// NVPTX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 0
322+
// NVPTX-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
323+
// NVPTX: [[COND_TRUE]]:
324+
// NVPTX-NEXT: br label %[[COND_END:.*]]
325+
// NVPTX: [[COND_FALSE]]:
326+
// NVPTX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
327+
// NVPTX-NEXT: br label %[[COND_END]]
328+
// NVPTX: [[COND_END]]:
329+
// NVPTX-NEXT: [[COND:%.*]] = phi i32 [ 0, %[[COND_TRUE]] ], [ [[TMP3]], %[[COND_FALSE]] ]
330+
// NVPTX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
331+
// NVPTX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
332+
// NVPTX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
333+
// NVPTX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
334+
// NVPTX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
335+
// NVPTX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
336+
// NVPTX-NEXT: br i1 [[CMP1]], label %[[OMP_DISPATCH_BODY:.*]], label %[[OMP_DISPATCH_END:.*]]
337+
// NVPTX: [[OMP_DISPATCH_BODY]]:
338+
// NVPTX-NEXT: br label %[[OMP_INNER_FOR_COND:.*]]
339+
// NVPTX: [[OMP_INNER_FOR_COND]]:
340+
// NVPTX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
341+
// NVPTX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
342+
// NVPTX-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
343+
// NVPTX-NEXT: br i1 [[CMP2]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]]
344+
// NVPTX: [[OMP_INNER_FOR_BODY]]:
345+
// NVPTX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
346+
// NVPTX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
347+
// NVPTX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
348+
// NVPTX-NEXT: store i32 [[ADD]], ptr [[I]], align 4
349+
// NVPTX-NEXT: br label %[[OMP_BODY_CONTINUE:.*]]
350+
// NVPTX: [[OMP_BODY_CONTINUE]]:
351+
// NVPTX-NEXT: br label %[[OMP_INNER_FOR_INC:.*]]
352+
// NVPTX: [[OMP_INNER_FOR_INC]]:
353+
// NVPTX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
354+
// NVPTX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
355+
// NVPTX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4
356+
// NVPTX-NEXT: br label %[[OMP_INNER_FOR_COND]]
357+
// NVPTX: [[OMP_INNER_FOR_END]]:
358+
// NVPTX-NEXT: br label %[[OMP_DISPATCH_INC:.*]]
359+
// NVPTX: [[OMP_DISPATCH_INC]]:
360+
// NVPTX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
361+
// NVPTX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
362+
// NVPTX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
363+
// NVPTX-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4
364+
// NVPTX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
365+
// NVPTX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
366+
// NVPTX-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
367+
// NVPTX-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4
368+
// NVPTX-NEXT: br label %[[OMP_DISPATCH_COND]]
369+
// NVPTX: [[OMP_DISPATCH_END]]:
370+
// NVPTX-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
371+
// NVPTX-NEXT: ret void
372+
//
373+
//
374+
// NVPTX-LABEL: define internal void @_Z3barv_omp_outlined1_wrapper(
375+
// NVPTX-SAME: i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2]] {
376+
// NVPTX-NEXT: [[ENTRY:.*:]]
377+
// NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
378+
// NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
379+
// NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
380+
// NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
381+
// NVPTX-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2
382+
// NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
383+
// NVPTX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
384+
// NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
385+
// NVPTX-NEXT: call void @_Z3barv_omp_outlined1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
386+
// NVPTX-NEXT: ret void
387+
//
388+
//
184389
// NVPTX-LABEL: define dso_local void @_Z3bazPi(
185390
// NVPTX-SAME: ptr noundef [[P:%.*]]) #[[ATTR0]] {
186391
// NVPTX-NEXT: [[ENTRY:.*:]]
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple amdgcn-amd-amdhsa -emit-llvm %s
2+
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple nvptx64-nvidia-cuda -emit-llvm %s
3+
4+
void foo() {
5+
#pragma omp target // expected-error {{Cannot emit a '#pragma omp target' on the GPU}}
6+
;
7+
}

0 commit comments

Comments
 (0)