Skip to content

Commit 7609f76

Browse files
committed
[OpenMP] Ensure the actual kernel is annotated with launch bounds
1 parent c004067 commit 7609f76

File tree

6 files changed

+1560
-1548
lines changed

6 files changed

+1560
-1548
lines changed

clang/lib/CodeGen/CGStmtOpenMP.cpp

Lines changed: 27 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -639,27 +639,42 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
639639
// Build the argument list.
640640
bool NeedWrapperFunction =
641641
getDebugInfo() && CGM.getCodeGenOpts().hasReducedDebugInfo();
642-
FunctionArgList Args;
643-
llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
644-
llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
642+
FunctionArgList Args, WrapperArgs;
643+
llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs,
644+
WrapperLocalAddrs;
645+
llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes,
646+
WrapperVLASizes;
645647
SmallString<256> Buffer;
646648
llvm::raw_svector_ostream Out(Buffer);
647649
Out << CapturedStmtInfo->getHelperName();
648-
if (NeedWrapperFunction)
650+
651+
CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
652+
llvm::Function *WrapperF = nullptr;
653+
if (NeedWrapperFunction) {
654+
// Emit the final kernel early to allow attributes to be added by the
655+
// OpenMPI-IR-Builder.
656+
FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
657+
/*RegisterCastedArgsOnly=*/true,
658+
CapturedStmtInfo->getHelperName(), Loc);
659+
WrapperCGF.CapturedStmtInfo = CapturedStmtInfo;
660+
WrapperF =
661+
emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes,
662+
WrapperCGF.CXXThisValue, WrapperFO);
649663
Out << "_debug__";
664+
}
650665
FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
651666
Out.str(), Loc);
652-
llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs,
653-
VLASizes, CXXThisValue, FO);
667+
llvm::Function *F = emitOutlinedFunctionPrologue(
668+
*this, WrapperArgs, WrapperLocalAddrs, WrapperVLASizes, CXXThisValue, FO);
654669
CodeGenFunction::OMPPrivateScope LocalScope(*this);
655-
for (const auto &LocalAddrPair : LocalAddrs) {
670+
for (const auto &LocalAddrPair : WrapperLocalAddrs) {
656671
if (LocalAddrPair.second.first) {
657672
LocalScope.addPrivate(LocalAddrPair.second.first,
658673
LocalAddrPair.second.second);
659674
}
660675
}
661676
(void)LocalScope.Privatize();
662-
for (const auto &VLASizePair : VLASizes)
677+
for (const auto &VLASizePair : WrapperVLASizes)
663678
VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
664679
PGO.assignRegionCounters(GlobalDecl(CD), F);
665680
CapturedStmtInfo->EmitBody(*this, CD->getBody());
@@ -668,17 +683,10 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
668683
if (!NeedWrapperFunction)
669684
return F;
670685

671-
FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
672-
/*RegisterCastedArgsOnly=*/true,
673-
CapturedStmtInfo->getHelperName(), Loc);
674-
CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
675-
WrapperCGF.CapturedStmtInfo = CapturedStmtInfo;
676-
Args.clear();
677-
LocalAddrs.clear();
678-
VLASizes.clear();
679-
llvm::Function *WrapperF =
680-
emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes,
681-
WrapperCGF.CXXThisValue, WrapperFO);
686+
// Reverse the order.
687+
WrapperF->removeFromParent();
688+
F->getParent()->getFunctionList().insertAfter(F->getIterator(), WrapperF);
689+
682690
llvm::SmallVector<llvm::Value *, 4> CallArgs;
683691
auto *PI = F->arg_begin();
684692
for (const auto *Arg : Args) {

0 commit comments

Comments
 (0)