@@ -639,27 +639,42 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
639639 // Build the argument list.
640640 bool NeedWrapperFunction =
641641 getDebugInfo () && CGM.getCodeGenOpts ().hasReducedDebugInfo ();
642- FunctionArgList Args;
643- llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
644- llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
642+ FunctionArgList Args, WrapperArgs;
643+ llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs,
644+ WrapperLocalAddrs;
645+ llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes,
646+ WrapperVLASizes;
645647 SmallString<256 > Buffer;
646648 llvm::raw_svector_ostream Out (Buffer);
647649 Out << CapturedStmtInfo->getHelperName ();
648- if (NeedWrapperFunction)
650+
651+ CodeGenFunction WrapperCGF (CGM, /* suppressNewContext=*/ true );
652+ llvm::Function *WrapperF = nullptr ;
653+ if (NeedWrapperFunction) {
654+ // Emit the final kernel early to allow attributes to be added by the
655+ // OpenMPI-IR-Builder.
656+ FunctionOptions WrapperFO (&S, /* UIntPtrCastRequired=*/ true ,
657+ /* RegisterCastedArgsOnly=*/ true ,
658+ CapturedStmtInfo->getHelperName (), Loc);
659+ WrapperCGF.CapturedStmtInfo = CapturedStmtInfo;
660+ WrapperF =
661+ emitOutlinedFunctionPrologue (WrapperCGF, Args, LocalAddrs, VLASizes,
662+ WrapperCGF.CXXThisValue , WrapperFO);
649663 Out << " _debug__" ;
664+ }
650665 FunctionOptions FO (&S, !NeedWrapperFunction, /* RegisterCastedArgsOnly=*/ false ,
651666 Out.str (), Loc);
652- llvm::Function *F = emitOutlinedFunctionPrologue (* this , Args, LocalAddrs,
653- VLASizes , CXXThisValue, FO);
667+ llvm::Function *F = emitOutlinedFunctionPrologue (
668+ * this , WrapperArgs, WrapperLocalAddrs, WrapperVLASizes , CXXThisValue, FO);
654669 CodeGenFunction::OMPPrivateScope LocalScope (*this );
655- for (const auto &LocalAddrPair : LocalAddrs ) {
670+ for (const auto &LocalAddrPair : WrapperLocalAddrs ) {
656671 if (LocalAddrPair.second .first ) {
657672 LocalScope.addPrivate (LocalAddrPair.second .first ,
658673 LocalAddrPair.second .second );
659674 }
660675 }
661676 (void )LocalScope.Privatize ();
662- for (const auto &VLASizePair : VLASizes )
677+ for (const auto &VLASizePair : WrapperVLASizes )
663678 VLASizeMap[VLASizePair.second .first ] = VLASizePair.second .second ;
664679 PGO.assignRegionCounters (GlobalDecl (CD), F);
665680 CapturedStmtInfo->EmitBody (*this , CD->getBody ());
@@ -668,17 +683,10 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
668683 if (!NeedWrapperFunction)
669684 return F;
670685
671- FunctionOptions WrapperFO (&S, /* UIntPtrCastRequired=*/ true ,
672- /* RegisterCastedArgsOnly=*/ true ,
673- CapturedStmtInfo->getHelperName (), Loc);
674- CodeGenFunction WrapperCGF (CGM, /* suppressNewContext=*/ true );
675- WrapperCGF.CapturedStmtInfo = CapturedStmtInfo;
676- Args.clear ();
677- LocalAddrs.clear ();
678- VLASizes.clear ();
679- llvm::Function *WrapperF =
680- emitOutlinedFunctionPrologue (WrapperCGF, Args, LocalAddrs, VLASizes,
681- WrapperCGF.CXXThisValue , WrapperFO);
686+ // Reverse the order.
687+ WrapperF->removeFromParent ();
688+ F->getParent ()->getFunctionList ().insertAfter (F->getIterator (), WrapperF);
689+
682690 llvm::SmallVector<llvm::Value *, 4 > CallArgs;
683691 auto *PI = F->arg_begin ();
684692 for (const auto *Arg : Args) {
0 commit comments