Skip to content

Commit bd75628

Browse files
committed
[OpenMP][FIX] Enforce a function boundary for a new data environment
Whenever we enter a new OpenMP data environment we want to enter a function to simplify reasoning. Later we probably want to remove the entire specialization wrt. the if clause and pass the result to the runtime, for now this should fix PR48686. Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D94315
1 parent f19849a commit bd75628

File tree

3 files changed

+29
-15
lines changed

3 files changed

+29
-15
lines changed

clang/lib/CodeGen/CGOpenMPRuntime.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2098,6 +2098,14 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
20982098
OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
20992099
OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
21002100
OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2101+
2102+
// Ensure we do not inline the function. This is trivially true for the ones
2103+
// passed to __kmpc_fork_call but the ones calles in serialized regions
2104+
// could be inlined. This is not a perfect but it is closer to the invariant
2105+
// we want, namely, every data environment starts with a new function.
2106+
// TODO: We should pass the if condition to the runtime function and do the
2107+
// handling there. Much cleaner code.
2108+
OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
21012109
RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
21022110

21032111
// __kmpc_end_serialized_parallel(&Loc, GTid);

clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp

Lines changed: 8 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1576,11 +1576,6 @@ llvm::Function *CGOpenMPRuntimeGPU::emitParallelOutlinedFunction(
15761576
auto *OutlinedFun =
15771577
cast<llvm::Function>(CGOpenMPRuntime::emitParallelOutlinedFunction(
15781578
D, ThreadIDVar, InnermostKind, CodeGen));
1579-
if (CGM.getLangOpts().Optimize) {
1580-
OutlinedFun->removeFnAttr(llvm::Attribute::NoInline);
1581-
OutlinedFun->removeFnAttr(llvm::Attribute::OptimizeNone);
1582-
OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline);
1583-
}
15841579
IsInTargetMasterThreadRegion = PrevIsInTargetMasterThreadRegion;
15851580
IsInTTDRegion = PrevIsInTTDRegion;
15861581
if (getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD &&
@@ -1698,11 +1693,6 @@ llvm::Function *CGOpenMPRuntimeGPU::emitTeamsOutlinedFunction(
16981693
CodeGen.setAction(Action);
16991694
llvm::Function *OutlinedFun = CGOpenMPRuntime::emitTeamsOutlinedFunction(
17001695
D, ThreadIDVar, InnermostKind, CodeGen);
1701-
if (CGM.getLangOpts().Optimize) {
1702-
OutlinedFun->removeFnAttr(llvm::Attribute::NoInline);
1703-
OutlinedFun->removeFnAttr(llvm::Attribute::OptimizeNone);
1704-
OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline);
1705-
}
17061696

17071697
return OutlinedFun;
17081698
}
@@ -2102,6 +2092,14 @@ void CGOpenMPRuntimeGPU::emitNonSPMDParallelCall(
21022092
// Force inline this outlined function at its call site.
21032093
Fn->setLinkage(llvm::GlobalValue::InternalLinkage);
21042094

2095+
// Ensure we do not inline the function. This is trivially true for the ones
2096+
// passed to __kmpc_fork_call but the ones calles in serialized regions
2097+
// could be inlined. This is not a perfect but it is closer to the invariant
2098+
// we want, namely, every data environment starts with a new function.
2099+
// TODO: We should pass the if condition to the runtime function and do the
2100+
// handling there. Much cleaner code.
2101+
cast<llvm::Function>(OutlinedFn)->addFnAttr(llvm::Attribute::NoInline);
2102+
21052103
Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
21062104
/*Name=*/".zero.addr");
21072105
CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
@@ -3134,11 +3132,6 @@ static llvm::Function *emitShuffleAndReduceFunction(
31343132
"_omp_reduction_shuffle_and_reduce_func", &CGM.getModule());
31353133
CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
31363134
Fn->setDoesNotRecurse();
3137-
if (CGM.getLangOpts().Optimize) {
3138-
Fn->removeFnAttr(llvm::Attribute::NoInline);
3139-
Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
3140-
Fn->addFnAttr(llvm::Attribute::AlwaysInline);
3141-
}
31423135

31433136
CodeGenFunction CGF(CGM);
31443137
CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);

clang/test/OpenMP/parallel_if_codegen.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
88
// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
99

10+
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple %itanium_abi_triple -emit-llvm %s -disable-O0-optnone -o - | FileCheck %s --check-prefix=WOPT
1011
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s
1112
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s
1213
// RUN: %clang_cc1 -fopenmp -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
@@ -96,14 +97,20 @@ int main() {
9697
return tmain(Arg);
9798
}
9899

100+
// WOPT: noinline
101+
// WOPT-NOT: optnone
99102
// CHECK: define internal {{.*}}void [[CAP_FN4]]
100103
// CHECK: call {{.*}}void @{{.+}}fn4
101104
// CHECK: ret void
102105

106+
// WOPT: noinline
107+
// WOPT-NOT: optnone
103108
// CHECK: define internal {{.*}}void [[CAP_FN5]]
104109
// CHECK: call {{.*}}void @{{.+}}fn5
105110
// CHECK: ret void
106111

112+
// WOPT: noinline
113+
// WOPT-NOT: optnone
107114
// CHECK: define internal {{.*}}void [[CAP_FN6]]
108115
// CHECK: call {{.*}}void @{{.+}}fn6
109116
// CHECK: ret void
@@ -129,14 +136,20 @@ int main() {
129136
// CHECK: br label %[[OMP_END]]
130137
// CHECK: [[OMP_END]]
131138

139+
// WOPT: noinline
140+
// WOPT-NOT: optnone
132141
// CHECK: define internal {{.*}}void [[CAP_FN1]]
133142
// CHECK: call {{.*}}void @{{.+}}fn1
134143
// CHECK: ret void
135144

145+
// WOPT: noinline
146+
// WOPT-NOT: optnone
136147
// CHECK: define internal {{.*}}void [[CAP_FN2]]
137148
// CHECK: call {{.*}}void @{{.+}}fn2
138149
// CHECK: ret void
139150

151+
// WOPT: noinline
152+
// WOPT-NOT: optnone
140153
// CHECK: define internal {{.*}}void [[CAP_FN3]]
141154
// CHECK: call {{.*}}void @{{.+}}fn3
142155
// CHECK: ret void

0 commit comments

Comments
 (0)