diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 8cda583313ca4..6277ecd8d12ab 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -10923,8 +10923,8 @@ void CGOpenMPRuntime::emitTargetDataCalls( llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP); llvm::OpenMPIRBuilder::InsertPointTy AfterIP = cantFail(OMPBuilder.createTargetData( - OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCondVal, Info, GenMapInfoCB, - CustomMapperCB, + OmpLoc, AllocaIP, CodeGenIP, /*DeallocIPs=*/{}, DeviceID, IfCondVal, + Info, GenMapInfoCB, CustomMapperCB, /*MapperFunc=*/nullptr, BodyCB, DeviceAddrCB, RTLoc)); CGF.Builder.restoreIP(AfterIP); } diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index efc06a276267a..ecc801ca971c7 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -1886,10 +1886,10 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt(); - auto BodyGenCB = [&, this](InsertPointTy AllocaIP, - InsertPointTy CodeGenIP) { + auto BodyGenCB = [&, this](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) { OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody( - *this, ParallelRegionBodyStmt, AllocaIP, CodeGenIP, "parallel"); + *this, ParallelRegionBodyStmt, AllocIP, CodeGenIP, "parallel"); return llvm::Error::success(); }; @@ -1897,9 +1897,10 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); - llvm::OpenMPIRBuilder::InsertPointTy AfterIP = cantFail( - OMPBuilder.createParallel(Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB, - IfCond, NumThreads, ProcBind, S.hasCancel())); + llvm::OpenMPIRBuilder::InsertPointTy AfterIP = + cantFail(OMPBuilder.createParallel( + Builder, AllocaIP, /*DeallocIPs=*/{}, BodyGenCB, PrivCB, FiniCB, + IfCond, NumThreads, ProcBind, S.hasCancel())); Builder.restoreIP(AfterIP); return; } @@ -4427,21 +4428,23 @@ void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { llvm::SmallVector SectionCBVector; if (CS) { for (const Stmt *SubStmt : CS->children()) { - auto SectionCB = [this, SubStmt](InsertPointTy AllocaIP, - InsertPointTy CodeGenIP) { - OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( - *this, SubStmt, AllocaIP, CodeGenIP, "section"); + auto SectionCB = [this, SubStmt](InsertPointTy AllocIP, + InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) { + OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(*this, SubStmt, AllocIP, + CodeGenIP, "section"); return llvm::Error::success(); }; SectionCBVector.push_back(SectionCB); } } else { - auto SectionCB = [this, CapturedStmt](InsertPointTy AllocaIP, - InsertPointTy CodeGenIP) { - OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( - *this, CapturedStmt, AllocaIP, CodeGenIP, "section"); - return llvm::Error::success(); - }; + auto SectionCB = + [this, CapturedStmt](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) { + OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( + *this, CapturedStmt, AllocIP, CodeGenIP, "section"); + return llvm::Error::success(); + }; SectionCBVector.push_back(SectionCB); } @@ -4495,10 +4498,11 @@ void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { return llvm::Error::success(); }; - auto BodyGenCB = [SectionRegionBodyStmt, this](InsertPointTy AllocaIP, - InsertPointTy CodeGenIP) { + auto BodyGenCB = [SectionRegionBodyStmt, + this](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) { OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( - *this, SectionRegionBodyStmt, AllocaIP, CodeGenIP, "section"); + *this, SectionRegionBodyStmt, AllocIP, CodeGenIP, "section"); return llvm::Error::success(); }; @@ -4580,10 +4584,11 @@ void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { return llvm::Error::success(); }; - auto BodyGenCB = [MasterRegionBodyStmt, this](InsertPointTy AllocaIP, - InsertPointTy CodeGenIP) { + auto BodyGenCB = [MasterRegionBodyStmt, + this](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) { OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( - *this, MasterRegionBodyStmt, AllocaIP, CodeGenIP, "master"); + *this, MasterRegionBodyStmt, AllocIP, CodeGenIP, "master"); return llvm::Error::success(); }; @@ -4630,10 +4635,11 @@ void CodeGenFunction::EmitOMPMaskedDirective(const OMPMaskedDirective &S) { return llvm::Error::success(); }; - auto BodyGenCB = [MaskedRegionBodyStmt, this](InsertPointTy AllocaIP, - InsertPointTy CodeGenIP) { + auto BodyGenCB = [MaskedRegionBodyStmt, + this](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) { OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( - *this, MaskedRegionBodyStmt, AllocaIP, CodeGenIP, "masked"); + *this, MaskedRegionBodyStmt, AllocIP, CodeGenIP, "masked"); return llvm::Error::success(); }; @@ -4673,10 +4679,11 @@ void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { return llvm::Error::success(); }; - auto BodyGenCB = [CriticalRegionBodyStmt, this](InsertPointTy AllocaIP, - InsertPointTy CodeGenIP) { + auto BodyGenCB = [CriticalRegionBodyStmt, + this](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) { OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( - *this, CriticalRegionBodyStmt, AllocaIP, CodeGenIP, "critical"); + *this, CriticalRegionBodyStmt, AllocIP, CodeGenIP, "critical"); return llvm::Error::success(); }; @@ -5643,8 +5650,8 @@ void CodeGenFunction::EmitOMPTaskgroupDirective( InsertPointTy AllocaIP(AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); - auto BodyGenCB = [&, this](InsertPointTy AllocaIP, - InsertPointTy CodeGenIP) { + auto BodyGenCB = [&, this](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) { Builder.restoreIP(CodeGenIP); EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); return llvm::Error::success(); @@ -5653,7 +5660,8 @@ void CodeGenFunction::EmitOMPTaskgroupDirective( if (!CapturedStmtInfo) CapturedStmtInfo = &CapStmtInfo; llvm::OpenMPIRBuilder::InsertPointTy AfterIP = - cantFail(OMPBuilder.createTaskgroup(Builder, AllocaIP, BodyGenCB)); + cantFail(OMPBuilder.createTaskgroup(Builder, AllocaIP, + /*DeallocIPs=*/{}, BodyGenCB)); Builder.restoreIP(AfterIP); return; } @@ -6233,8 +6241,9 @@ void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { return llvm::Error::success(); }; - auto BodyGenCB = [&S, C, this](InsertPointTy AllocaIP, - InsertPointTy CodeGenIP) { + auto BodyGenCB = [&S, C, this](InsertPointTy AllocIP, + InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) { Builder.restoreIP(CodeGenIP); const CapturedStmt *CS = S.getInnermostCapturedStmt(); @@ -6251,7 +6260,7 @@ void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { OutlinedFn, CapturedVars); } else { OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( - *this, CS->getCapturedStmt(), AllocaIP, CodeGenIP, "ordered"); + *this, CS->getCapturedStmt(), AllocIP, CodeGenIP, "ordered"); } return llvm::Error::success(); }; diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 0b8f22719faf1..02d61c1a3626a 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -612,17 +612,19 @@ class OpenMPIRBuilder { /// such InsertPoints need to be preserved, it can split the block itself /// before calling the callback. /// - /// AllocaIP and CodeGenIP must not point to the same position. - /// - /// \param AllocaIP is the insertion point at which new alloca instructions - /// should be placed. The BasicBlock it is pointing to must - /// not be split. - /// \param CodeGenIP is the insertion point at which the body code should be - /// placed. - /// + /// AllocIP and CodeGenIP must not point to the same position. + /// + /// \param AllocIP is the insertion point at which new allocations should + /// be placed. The BasicBlock it is pointing to must not be + /// split. + /// \param CodeGenIP is the insertion point at which the body code should be + /// placed. + /// \param DeallocIPs is the list of insertion points where explicit + /// deallocations, if needed, should be placed. /// \return an error, if any were triggered during execution. using BodyGenCallbackTy = - function_ref; + function_ref DeallocIPs)>; // This is created primarily for sections construct as llvm::function_ref // (BodyGenCallbackTy) is not storable (as described in the comments of @@ -631,7 +633,8 @@ class OpenMPIRBuilder { /// /// \return an error, if any were triggered during execution. using StorableBodyGenCallbackTy = - std::function; + std::function DeallocIPs)>; /// Callback type for loop body code generation. /// @@ -725,7 +728,9 @@ class OpenMPIRBuilder { /// Generator for '#omp parallel' /// /// \param Loc The insert and source location description. - /// \param AllocaIP The insertion points to be used for alloca instructions. + /// \param AllocIP The insertion point to be used for allocations. + /// \param DeallocIPs The insertion points to be used for explicit + /// deallocations, if needed. /// \param BodyGenCB Callback that will generate the region code. /// \param PrivCB Callback to copy a given variable (think copy constructor). /// \param FiniCB Callback to finalize variable copies. @@ -736,10 +741,10 @@ class OpenMPIRBuilder { /// /// \returns The insertion position *after* the parallel. LLVM_ABI InsertPointOrErrorTy createParallel( - const LocationDescription &Loc, InsertPointTy AllocaIP, - BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, - FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads, - omp::ProcBindKind ProcBind, bool IsCancellable); + const LocationDescription &Loc, InsertPointTy AllocIP, + ArrayRef DeallocIPs, BodyGenCallbackTy BodyGenCB, + PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition, + Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable); /// Generator for the control flow structure of an OpenMP canonical loop. /// @@ -1363,7 +1368,9 @@ class OpenMPIRBuilder { /// Generator for `#omp task` /// /// \param Loc The location where the task construct was encountered. - /// \param AllocaIP The insertion point to be used for alloca instructions. + /// \param AllocIP The insertion point to be used for allocations. + /// \param DeallocIPs The insertion points to be used for explicit + /// deallocations, if needed. /// \param BodyGenCB Callback that will generate the region code. /// \param Tied True if the task is tied, false if the task is untied. /// \param Final i1 value which is `true` if the task is final, `false` if the @@ -1379,21 +1386,23 @@ class OpenMPIRBuilder { /// \param Mergeable If the given task is `mergeable` /// \param priority `priority-value' specifies the execution order of the /// tasks that is generated by the construct - LLVM_ABI InsertPointOrErrorTy - createTask(const LocationDescription &Loc, InsertPointTy AllocaIP, - BodyGenCallbackTy BodyGenCB, bool Tied = true, - Value *Final = nullptr, Value *IfCondition = nullptr, - SmallVector Dependencies = {}, bool Mergeable = false, - Value *EventHandle = nullptr, Value *Priority = nullptr); + LLVM_ABI InsertPointOrErrorTy createTask( + const LocationDescription &Loc, InsertPointTy AllocIP, + ArrayRef DeallocIPs, BodyGenCallbackTy BodyGenCB, + bool Tied = true, Value *Final = nullptr, Value *IfCondition = nullptr, + SmallVector Dependencies = {}, bool Mergeable = false, + Value *EventHandle = nullptr, Value *Priority = nullptr); /// Generator for the taskgroup construct /// /// \param Loc The location where the taskgroup construct was encountered. - /// \param AllocaIP The insertion point to be used for alloca instructions. + /// \param AllocIP The insertion point to be used for allocations. + /// \param DeallocIPs The insertion point to be used for explicit deallocation + /// instructions, if needed. /// \param BodyGenCB Callback that will generate the region code. - LLVM_ABI InsertPointOrErrorTy createTaskgroup(const LocationDescription &Loc, - InsertPointTy AllocaIP, - BodyGenCallbackTy BodyGenCB); + LLVM_ABI InsertPointOrErrorTy createTaskgroup( + const LocationDescription &Loc, InsertPointTy AllocIP, + ArrayRef DeallocIPs, BodyGenCallbackTy BodyGenCB); using FileIdentifierInfoCallbackTy = std::function()>; @@ -2262,7 +2271,8 @@ class OpenMPIRBuilder { struct OutlineInfo { using PostOutlineCBTy = std::function; PostOutlineCBTy PostOutlineCB; - BasicBlock *EntryBB, *ExitBB, *OuterAllocaBB; + BasicBlock *EntryBB, *ExitBB, *OuterAllocBB; + SmallVector OuterDeallocBBs; SmallVector ExcludeArgsFromAggregate; LLVM_ABI virtual ~OutlineInfo() = default; @@ -2335,7 +2345,8 @@ class OpenMPIRBuilder { /// \return an error, if any were triggered during execution. LLVM_ABI Error emitIfClause(Value *Cond, BodyGenCallbackTy ThenGen, BodyGenCallbackTy ElseGen, - InsertPointTy AllocaIP = {}); + InsertPointTy AllocIP = {}, + ArrayRef DeallocIPs = {}); /// Create the global variable holding the offload mappings information. LLVM_ABI GlobalVariable * @@ -2891,11 +2902,13 @@ class OpenMPIRBuilder { /// Generator for `#omp distribute` /// /// \param Loc The location where the distribute construct was encountered. - /// \param AllocaIP The insertion points to be used for alloca instructions. + /// \param AllocIP The insertion point to be used for allocations. + /// \param DeallocIPs The insertion points to be used for explicit + /// deallocations, if needed. /// \param BodyGenCB Callback that will generate the region code. - LLVM_ABI InsertPointOrErrorTy createDistribute(const LocationDescription &Loc, - InsertPointTy AllocaIP, - BodyGenCallbackTy BodyGenCB); + LLVM_ABI InsertPointOrErrorTy createDistribute( + const LocationDescription &Loc, InsertPointTy AllocIP, + ArrayRef DeallocIPs, BodyGenCallbackTy BodyGenCB); /// Generate conditional branch and relevant BasicBlocks through which private /// threads copy the 'copyin' variables from Master copy to threadprivate @@ -3223,9 +3236,11 @@ class OpenMPIRBuilder { /// Generator for '#omp target data' /// /// \param Loc The location where the target data construct was encountered. - /// \param AllocaIP The insertion points to be used for alloca instructions. + /// \param AllocIP The insertion points to be used for allocations. /// \param CodeGenIP The insertion point at which the target directive code /// should be placed. + /// \param DeallocIPs The insertion points at which explicit deallocations + /// should be placed, if needed. /// \param IsBegin If true then emits begin mapper call otherwise emits /// end mapper call. /// \param DeviceID Stores the DeviceID from the device clause. @@ -3238,10 +3253,10 @@ class OpenMPIRBuilder { /// \param DeviceAddrCB Optional callback to generate code related to /// use_device_ptr and use_device_addr. LLVM_ABI InsertPointOrErrorTy createTargetData( - const LocationDescription &Loc, InsertPointTy AllocaIP, - InsertPointTy CodeGenIP, Value *DeviceID, Value *IfCond, - TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB, - CustomMapperCallbackTy CustomMapperCB, + const LocationDescription &Loc, InsertPointTy AllocIP, + InsertPointTy CodeGenIP, ArrayRef DeallocIPs, + Value *DeviceID, Value *IfCond, TargetDataInfo &Info, + GenMapInfoCallbackTy GenMapInfoCB, CustomMapperCallbackTy CustomMapperCB, omp::RuntimeFunction *MapperFunc = nullptr, function_ref @@ -3250,7 +3265,8 @@ class OpenMPIRBuilder { Value *SrcLocInfo = nullptr); using TargetBodyGenCallbackTy = function_ref; + InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef DeallocIPs)>; using TargetGenArgAccessorsCallbackTy = function_ref DeallocIPs, TargetDataInfo &Info, TargetRegionEntryInfo &EntryInfo, const TargetKernelDefaultAttrs &DefaultAttrs, const TargetKernelRuntimeAttrs &RuntimeAttrs, Value *IfCond, diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h index b3bea96039172..7b1e3a759470f 100644 --- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h @@ -100,13 +100,13 @@ class CodeExtractorAnalysisCache { /// will be placed in the entry block of the function. BasicBlock *AllocationBlock; - /// A block outside of the extraction set where deallocations for - /// intermediate allocations can be placed inside. Not used for - /// automatically deallocated memory (e.g. `alloca`), which is the default. + /// A set of blocks outside of the extraction set where deallocations for + /// intermediate allocations should be placed. Not used for automatically + /// deallocated memory (e.g. `alloca`), which is the default. /// - /// If it is null and needed, the end of the replacement basic block will be - /// used to place deallocations. - BasicBlock *DeallocationBlock; + /// If it is empty and needed, the end of the replacement basic block will + /// be used to place deallocations. + SmallVector DeallocationBlocks; /// If true, varargs functions can be extracted. bool AllowVarArgs; @@ -156,11 +156,11 @@ class CodeExtractorAnalysisCache { /// Any new allocations will be placed in the AllocationBlock, unless /// it is null, in which case it will be placed in the entry block of /// the function from which the code is being extracted. Explicit - /// deallocations for the aforementioned allocations will be placed in the - /// DeallocationBlock or the end of the replacement block, if needed. - /// If ArgsInZeroAddressSpace param is set to true, then the aggregate - /// param pointer of the outlined function is declared in zero address - /// space. + /// deallocations for the aforementioned allocations will be placed, if + /// needed, in all blocks in DeallocationBlocks or the end of the + /// replacement block. If ArgsInZeroAddressSpace param is set to true, then + /// the aggregate param pointer of the outlined function is declared in zero + /// address space. LLVM_ABI CodeExtractor(ArrayRef BBs, DominatorTree *DT = nullptr, bool AggregateArgs = false, BlockFrequencyInfo *BFI = nullptr, @@ -168,7 +168,7 @@ class CodeExtractorAnalysisCache { AssumptionCache *AC = nullptr, bool AllowVarArgs = false, bool AllowAlloca = false, BasicBlock *AllocationBlock = nullptr, - BasicBlock *DeallocationBlock = nullptr, + ArrayRef DeallocationBlocks = {}, std::string Suffix = "", bool ArgsInZeroAddressSpace = false); LLVM_ABI virtual ~CodeExtractor() = default; diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 26cb529c92e5d..bd483aa2c5e02 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -488,10 +488,10 @@ class OMPCodeExtractor : public CodeExtractor { AssumptionCache *AC = nullptr, bool AllowVarArgs = false, bool AllowAlloca = false, BasicBlock *AllocationBlock = nullptr, - BasicBlock *DeallocationBlock = nullptr, + ArrayRef DeallocationBlocks = {}, std::string Suffix = "", bool ArgsInZeroAddressSpace = false) : CodeExtractor(BBs, DT, AggregateArgs, BFI, BPI, AC, AllowVarArgs, - AllowAlloca, AllocationBlock, DeallocationBlock, Suffix, + AllowAlloca, AllocationBlock, DeallocationBlocks, Suffix, ArgsInZeroAddressSpace), OMPBuilder(OMPBuilder) {} @@ -503,32 +503,16 @@ class OMPCodeExtractor : public CodeExtractor { class DeviceSharedMemCodeExtractor : public OMPCodeExtractor { public: - DeviceSharedMemCodeExtractor( - OpenMPIRBuilder &OMPBuilder, BasicBlock *AllocBlockOverride, - ArrayRef BBs, DominatorTree *DT = nullptr, - bool AggregateArgs = false, BlockFrequencyInfo *BFI = nullptr, - BranchProbabilityInfo *BPI = nullptr, AssumptionCache *AC = nullptr, - bool AllowVarArgs = false, bool AllowAlloca = false, - BasicBlock *AllocationBlock = nullptr, - BasicBlock *DeallocationBlock = nullptr, std::string Suffix = "", - bool ArgsInZeroAddressSpace = false) - : OMPCodeExtractor(OMPBuilder, BBs, DT, AggregateArgs, BFI, BPI, AC, - AllowVarArgs, AllowAlloca, AllocationBlock, - DeallocationBlock, Suffix, ArgsInZeroAddressSpace), - AllocBlockOverride(AllocBlockOverride) {} + using OMPCodeExtractor::OMPCodeExtractor; virtual ~DeviceSharedMemCodeExtractor() = default; protected: virtual Instruction * - allocateVar(BasicBlock *, BasicBlock::iterator, Type *VarType, + allocateVar(BasicBlock *BB, BasicBlock::iterator AllocIP, Type *VarType, const Twine &Name = Twine(""), AddrSpaceCastInst **CastedAlloc = nullptr) override { - // Ignore the CastedAlloc pointer, if requested, because shared memory - // should not be casted to address space 0 to be passed around. return OMPBuilder.createOMPAllocShared( - OpenMPIRBuilder::InsertPointTy( - AllocBlockOverride, AllocBlockOverride->getFirstInsertionPt()), - VarType, Name); + OpenMPIRBuilder::InsertPointTy(BB, AllocIP), VarType, Name); } virtual Instruction *deallocateVar(BasicBlock *BB, @@ -537,19 +521,12 @@ class DeviceSharedMemCodeExtractor : public OMPCodeExtractor { return OMPBuilder.createOMPFreeShared( OpenMPIRBuilder::InsertPointTy(BB, DeallocIP), Var, VarType); } - -private: - // TODO: Remove the need for this override and instead get the CodeExtractor - // to provide a valid insert point for explicit deallocations by correctly - // populating its DeallocationBlock. - BasicBlock *AllocBlockOverride; }; /// Helper storing information about regions to outline using device shared /// memory for intermediate allocations. struct DeviceSharedMemOutlineInfo : public OpenMPIRBuilder::OutlineInfo { OpenMPIRBuilder &OMPBuilder; - BasicBlock *AllocBlockOverride = nullptr; DeviceSharedMemOutlineInfo(OpenMPIRBuilder &OMPBuilder) : OMPBuilder(OMPBuilder) {} @@ -1687,11 +1664,11 @@ hostParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, } OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createParallel( - const LocationDescription &Loc, InsertPointTy OuterAllocaIP, - BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, - FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads, - omp::ProcBindKind ProcBind, bool IsCancellable) { - assert(!isConflictIP(Loc.IP, OuterAllocaIP) && "IPs must not be ambiguous"); + const LocationDescription &Loc, InsertPointTy OuterAllocIP, + ArrayRef OuterDeallocIPs, BodyGenCallbackTy BodyGenCB, + PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition, + Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable) { + assert(!isConflictIP(Loc.IP, OuterAllocIP) && "IPs must not be ambiguous"); if (!updateToLocation(Loc)) return Loc.IP; @@ -1731,7 +1708,7 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createParallel( // Save the outer alloca block because the insertion iterator may get // invalidated and we still need this later. - BasicBlock *OuterAllocaBlock = OuterAllocaIP.getBlock(); + BasicBlock *OuterAllocaBlock = OuterAllocIP.getBlock(); // Vector to remember instructions we used only during the modeling but which // we want to delete at the end. @@ -1827,7 +1804,8 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createParallel( // Let the caller create the body. assert(BodyGenCB && "Expected body generation callback!"); InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->begin()); - if (Error Err = BodyGenCB(InnerAllocaIP, CodeGenIP)) + InsertPointTy DeallocIP(PRegExitBB, PRegExitBB->begin()); + if (Error Err = BodyGenCB(InnerAllocaIP, CodeGenIP, DeallocIP)) return Err; LLVM_DEBUG(dbgs() << "After body codegen: " << *OuterFn << "\n"); @@ -1840,35 +1818,8 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createParallel( // If OuterFn is not a Generic kernel, skip custom allocation. This causes // the CodeExtractor to follow its default behavior. Otherwise, we need to // use device shared memory to allocate argument structures. - if (ExecMode && *ExecMode & OMP_TGT_EXEC_MODE_GENERIC) { - auto Info = std::make_unique(*this); - - // Instead of using the insertion point provided by the CodeExtractor, - // here we need to use the block that eventually calls the outlined - // function for the `parallel` construct. - // - // The reason is that the explicit deallocation call will be inserted - // within the outlined function, whereas the alloca insertion point - // might actually be located somewhere else in the caller. This becomes - // a problem when e.g. `parallel` is inside of a `distribute` construct, - // because the deallocation would be executed multiple times and the - // allocation just once (outside of the loop). - // - // TODO: Ideally, we'd want to do the allocation and deallocation - // outside of the `parallel` outlined function, hence using here the - // insertion point provided by the CodeExtractor. We can't do this at - // the moment because there is currently no way of passing an eligible - // insertion point for the explicit deallocation to the CodeExtractor, - // as that block is created (at least when nested inside of - // `distribute`) sometime after createParallel() completed, so it can't - // be stored in the OutlineInfo structure here. - // - // The current approach results in an explicit allocation and - // deallocation pair for each `distribute` loop iteration in that case, - // which is suboptimal. - Info->AllocBlockOverride = EntryBB; - return Info; - } + if (ExecMode && *ExecMode & OMP_TGT_EXEC_MODE_GENERIC) + return std::make_unique(*this); } return std::make_unique(); }(); @@ -1890,9 +1841,12 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createParallel( }; } - OI->OuterAllocaBB = OuterAllocaBlock; + OI->OuterAllocBB = OuterAllocaBlock; OI->EntryBB = PRegEntryBB; OI->ExitBB = PRegExitBB; + OI->OuterDeallocBBs.reserve(OuterDeallocIPs.size()); + for (InsertPointTy DeallocIP : OuterDeallocIPs) + OI->OuterDeallocBBs.push_back(DeallocIP.getBlock()); SmallPtrSet ParallelRegionBlockSet; SmallVector Blocks; @@ -1907,7 +1861,7 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createParallel( /* AllowVarArgs */ true, /* AllowAlloca */ true, /* AllocationBlock */ OuterAllocaBlock, - /* DeallocationBlock */ nullptr, + /* DeallocationBlocks */ {}, /* Suffix */ ".omp_par", ArgsInZeroAddressSpace); // Find inputs to, outputs from the code region. @@ -1953,7 +1907,7 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createParallel( IRBuilder<>::InsertPointGuard Guard(Builder); LLVM_DEBUG(llvm::dbgs() << "Forwarding input as pointer: " << V << "\n"); - Builder.restoreIP(OuterAllocaIP); + Builder.restoreIP(OuterAllocIP); Value *Ptr = Builder.CreateAlloca(V.getType(), nullptr, V.getName() + ".reloaded"); @@ -2005,7 +1959,7 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createParallel( // Reset the outer alloca insertion point to the entry of the relevant block // in case it was invalidated. - OuterAllocaIP = IRBuilder<>::InsertPoint( + OuterAllocIP = IRBuilder<>::InsertPoint( OuterAllocaBlock, OuterAllocaBlock->getFirstInsertionPt()); for (Value *Input : Inputs) { @@ -2171,10 +2125,10 @@ static Value *emitTaskDependencies( } OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask( - const LocationDescription &Loc, InsertPointTy AllocaIP, - BodyGenCallbackTy BodyGenCB, bool Tied, Value *Final, Value *IfCondition, - SmallVector Dependencies, bool Mergeable, Value *EventHandle, - Value *Priority) { + const LocationDescription &Loc, InsertPointTy AllocIP, + ArrayRef DeallocIPs, BodyGenCallbackTy BodyGenCB, bool Tied, + Value *Final, Value *IfCondition, SmallVector Dependencies, + bool Mergeable, Value *EventHandle, Value *Priority) { if (!updateToLocation(Loc)) return InsertPointTy(); @@ -2206,18 +2160,22 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask( InsertPointTy TaskAllocaIP = InsertPointTy(TaskAllocaBB, TaskAllocaBB->begin()); InsertPointTy TaskBodyIP = InsertPointTy(TaskBodyBB, TaskBodyBB->begin()); - if (Error Err = BodyGenCB(TaskAllocaIP, TaskBodyIP)) + InsertPointTy TaskDeallocIP = InsertPointTy(TaskExitBB, TaskExitBB->begin()); + if (Error Err = BodyGenCB(TaskAllocaIP, TaskBodyIP, TaskDeallocIP)) return Err; auto OI = std::make_unique(); OI->EntryBB = TaskAllocaBB; - OI->OuterAllocaBB = AllocaIP.getBlock(); + OI->OuterAllocBB = AllocIP.getBlock(); OI->ExitBB = TaskExitBB; + OI->OuterDeallocBBs.reserve(DeallocIPs.size()); + for (InsertPointTy DeallocIP : DeallocIPs) + OI->OuterDeallocBBs.push_back(DeallocIP.getBlock()); // Add the thread ID argument. SmallVector ToBeDeleted; OI->ExcludeArgsFromAggregate.push_back(createFakeIntVal( - Builder, AllocaIP, ToBeDeleted, TaskAllocaIP, "global.tid", false)); + Builder, AllocIP, ToBeDeleted, TaskAllocaIP, "global.tid", false)); OI->PostOutlineCB = [this, Ident, Tied, Final, IfCondition, Dependencies, Mergeable, Priority, EventHandle, TaskAllocaBB, @@ -2434,10 +2392,9 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask( return Builder.saveIP(); } -OpenMPIRBuilder::InsertPointOrErrorTy -OpenMPIRBuilder::createTaskgroup(const LocationDescription &Loc, - InsertPointTy AllocaIP, - BodyGenCallbackTy BodyGenCB) { +OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTaskgroup( + const LocationDescription &Loc, InsertPointTy AllocIP, + ArrayRef DeallocIPs, BodyGenCallbackTy BodyGenCB) { if (!updateToLocation(Loc)) return InsertPointTy(); @@ -2452,7 +2409,7 @@ OpenMPIRBuilder::createTaskgroup(const LocationDescription &Loc, Builder.CreateCall(TaskgroupFn, {Ident, ThreadID}); BasicBlock *TaskgroupExitBB = splitBB(Builder, true, "taskgroup.exit"); - if (Error Err = BodyGenCB(AllocaIP, Builder.saveIP())) + if (Error Err = BodyGenCB(AllocIP, Builder.saveIP(), DeallocIPs)) return Err; Builder.SetInsertPoint(TaskgroupExitBB); @@ -2521,8 +2478,9 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createSections( SwitchStmt->addCase(Builder.getInt32(CaseNumber), CaseBB); Builder.SetInsertPoint(CaseBB); BranchInst *CaseEndBr = Builder.CreateBr(Continue); - if (Error Err = SectionCB(InsertPointTy(), {CaseEndBr->getParent(), - CaseEndBr->getIterator()})) + if (Error Err = + SectionCB(InsertPointTy(), + {CaseEndBr->getParent(), CaseEndBr->getIterator()}, {})) return Err; CaseNumber++; } @@ -4378,8 +4336,8 @@ Error OpenMPIRBuilder::emitScanBasedDirectiveDeclsIR( } // Allocate temporary buffer by master thread - auto BodyGenCB = [&](InsertPointTy AllocaIP, - InsertPointTy CodeGenIP) -> Error { + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) -> Error { Builder.restoreIP(CodeGenIP); Value *AllocSpan = Builder.CreateAdd(ScanRedInfo->Span, Builder.getInt32(1)); @@ -4418,8 +4376,8 @@ Error OpenMPIRBuilder::emitScanBasedDirectiveDeclsIR( Error OpenMPIRBuilder::emitScanBasedDirectiveFinalsIR( ArrayRef ReductionInfos, ScanInfo *ScanRedInfo) { - auto BodyGenCB = [&](InsertPointTy AllocaIP, - InsertPointTy CodeGenIP) -> Error { + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) -> Error { Builder.restoreIP(CodeGenIP); for (ReductionInfo RedInfo : ReductionInfos) { Value *PrivateVar = RedInfo.PrivateVariable; @@ -4470,8 +4428,8 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitScanReduction( if (!updateToLocation(Loc)) return Loc.IP; - auto BodyGenCB = [&](InsertPointTy AllocaIP, - InsertPointTy CodeGenIP) -> Error { + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) -> Error { Builder.restoreIP(CodeGenIP); Function *CurFn = Builder.GetInsertBlock()->getParent(); // for (int k = 0; k <= ceil(log2(n)); ++k) @@ -5317,13 +5275,13 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyWorkshareLoopTarget( Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); auto OI = std::make_unique(); - OI->OuterAllocaBB = CLI->getPreheader(); + OI->OuterAllocBB = CLI->getPreheader(); Function *OuterFn = CLI->getPreheader()->getParent(); // Instructions which need to be deleted at the end of code generation SmallVector ToBeDeleted; - OI->OuterAllocaBB = AllocaIP.getBlock(); + OI->OuterAllocBB = AllocaIP.getBlock(); // Mark the body loop as region which needs to be extracted OI->EntryBB = CLI->getBody(); @@ -5360,7 +5318,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyWorkshareLoopTarget( /* AllowVarArgs */ true, /* AllowAlloca */ true, /* AllocationBlock */ CLI->getPreheader(), - /* DeallocationBlock */ nullptr, + /* DeallocationBlocks */ {}, /* Suffix */ ".omp_wsloop", /* AggrArgsIn0AddrSpace */ true); @@ -6717,8 +6675,9 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::EmitOMPInlinedRegion( emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional); // generate body - if (Error Err = BodyGenCB(/* AllocaIP */ InsertPointTy(), - /* CodeGenIP */ Builder.saveIP())) + if (Error Err = + BodyGenCB(/* AllocIP */ InsertPointTy(), + /* CodeGenIP */ Builder.saveIP(), /* DeallocIPs */ {})) return Err; // emit exit call and do any needed finalization. @@ -7360,10 +7319,11 @@ Constant *OpenMPIRBuilder::registerTargetRegionFunction( } OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTargetData( - const LocationDescription &Loc, InsertPointTy AllocaIP, - InsertPointTy CodeGenIP, Value *DeviceID, Value *IfCond, - TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB, - CustomMapperCallbackTy CustomMapperCB, omp::RuntimeFunction *MapperFunc, + const LocationDescription &Loc, InsertPointTy AllocIP, + InsertPointTy CodeGenIP, ArrayRef DeallocIPs, + Value *DeviceID, Value *IfCond, TargetDataInfo &Info, + GenMapInfoCallbackTy GenMapInfoCB, CustomMapperCallbackTy CustomMapperCB, + omp::RuntimeFunction *MapperFunc, function_ref BodyGenCB, @@ -7389,11 +7349,11 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTargetData( // Generate the code for the opening of the data environment. Capture all the // arguments of the runtime call by reference because they are used in the // closing of the region. - auto BeginThenGen = [&](InsertPointTy AllocaIP, - InsertPointTy CodeGenIP) -> Error { + auto BeginThenGen = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) -> Error { MapInfo = &GenMapInfoCB(Builder.saveIP()); if (Error Err = emitOffloadingArrays( - AllocaIP, Builder.saveIP(), *MapInfo, Info, CustomMapperCB, + AllocIP, Builder.saveIP(), *MapInfo, Info, CustomMapperCB, /*IsNonContiguous=*/true, DeviceAddrCB)) return Err; @@ -7447,7 +7407,7 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTargetData( cantFail(TaskBodyCB(/*DeviceID=*/nullptr, /*RTLoc=*/nullptr, /*TargetTaskAllocaIP=*/{})); else - cantFail(emitTargetTask(TaskBodyCB, DeviceID, SrcLocInfo, AllocaIP, + cantFail(emitTargetTask(TaskBodyCB, DeviceID, SrcLocInfo, AllocIP, /*Dependencies=*/{}, RTArgs, Info.HasNoWait)); } else { Function *BeginMapperFunc = getOrCreateRuntimeFunctionPtr( @@ -7478,8 +7438,8 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTargetData( // If we need device pointer privatization, we need to emit the body of the // region with no privatization in the 'else' branch of the conditional. // Otherwise, we don't have to do anything. - auto BeginElseGen = [&](InsertPointTy AllocaIP, - InsertPointTy CodeGenIP) -> Error { + auto BeginElseGen = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) -> Error { InsertPointOrErrorTy AfterIP = BodyGenCB(Builder.saveIP(), BodyGenTy::DupNoPriv); if (!AfterIP) @@ -7489,7 +7449,8 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTargetData( }; // Generate code for the closing of the data region. - auto EndThenGen = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto EndThenGen = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) { TargetDataRTArgs RTArgs; Info.EmitDebug = !MapInfo->Names.empty(); emitOffloadingArraysArgument(Builder, RTArgs, Info, /*ForEndCall=*/true); @@ -7518,7 +7479,8 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTargetData( // We don't have to do anything to close the region if the if clause evaluates // to false. - auto EndElseGen = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto EndElseGen = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) { return Error::success(); }; @@ -7526,8 +7488,8 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTargetData( if (BodyGenCB) { Error Err = [&]() { if (IfCond) - return emitIfClause(IfCond, BeginThenGen, BeginElseGen, AllocaIP); - return BeginThenGen(AllocaIP, Builder.saveIP()); + return emitIfClause(IfCond, BeginThenGen, BeginElseGen, AllocIP); + return BeginThenGen(AllocIP, Builder.saveIP(), DeallocIPs); }(); if (Err) @@ -7542,12 +7504,12 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTargetData( restoreIPandDebugLoc(Builder, *AfterIP); if (IfCond) - return emitIfClause(IfCond, EndThenGen, EndElseGen, AllocaIP); - return EndThenGen(AllocaIP, Builder.saveIP()); + return emitIfClause(IfCond, EndThenGen, EndElseGen, AllocIP); + return EndThenGen(AllocIP, Builder.saveIP(), DeallocIPs); } if (IfCond) - return emitIfClause(IfCond, BeginThenGen, EndElseGen, AllocaIP); - return BeginThenGen(AllocaIP, Builder.saveIP()); + return emitIfClause(IfCond, BeginThenGen, EndElseGen, AllocIP); + return BeginThenGen(AllocIP, Builder.saveIP(), DeallocIPs); }(); if (Err) @@ -7759,15 +7721,18 @@ static Expected createOutlinedFunction( if (OMPBuilder.Config.isTargetDevice()) OMPBuilder.ConstantAllocaRaiseCandidates.emplace_back(Func); - // Insert target deinit call in the device compilation pass. + BasicBlock *ExitBB = splitBB(Builder, /*CreateBranch=*/true, "target.exit"); BasicBlock *OutlinedBodyBB = splitBB(Builder, /*CreateBranch=*/true, "outlined.body"); llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = CBFunc( Builder.saveIP(), - OpenMPIRBuilder::InsertPointTy(OutlinedBodyBB, OutlinedBodyBB->begin())); + OpenMPIRBuilder::InsertPointTy(OutlinedBodyBB, OutlinedBodyBB->begin()), + OpenMPIRBuilder::InsertPointTy(ExitBB, ExitBB->begin())); if (!AfterIP) return AfterIP.takeError(); - Builder.restoreIP(*AfterIP); + Builder.SetInsertPoint(ExitBB); + + // Insert target deinit call in the device compilation pass. if (OMPBuilder.Config.isTargetDevice()) OMPBuilder.createTargetDeinit(Builder); @@ -8216,7 +8181,7 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitTargetTask( auto OI = std::make_unique(); OI->EntryBB = TargetTaskAllocaBB; - OI->OuterAllocaBB = AllocaIP.getBlock(); + OI->OuterAllocBB = AllocaIP.getBlock(); // Add the thread ID argument. SmallVector ToBeDeleted; @@ -8478,7 +8443,8 @@ Error OpenMPIRBuilder::emitOffloadingArraysAndArgs( static void emitTargetCall( OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, - OpenMPIRBuilder::InsertPointTy AllocaIP, + OpenMPIRBuilder::InsertPointTy AllocIP, + ArrayRef DeallocIPs, OpenMPIRBuilder::TargetDataInfo &Info, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, const OpenMPIRBuilder::TargetKernelRuntimeAttrs &RuntimeAttrs, @@ -8535,8 +8501,9 @@ static void emitTargetCall( }; auto &&EmitTargetCallElse = - [&](OpenMPIRBuilder::InsertPointTy AllocaIP, - OpenMPIRBuilder::InsertPointTy CodeGenIP) -> Error { + [&](OpenMPIRBuilder::InsertPointTy AllocIP, + OpenMPIRBuilder::InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) -> Error { // Assume no error was returned because EmitTargetCallFallbackCB doesn't // produce any. OpenMPIRBuilder::InsertPointTy AfterIP = cantFail([&]() { @@ -8546,7 +8513,7 @@ static void emitTargetCall( // OutlinedFnID=nullptr results in that call not being done. OpenMPIRBuilder::TargetDataRTArgs EmptyRTArgs; return OMPBuilder.emitTargetTask(TaskBodyCB, /*DeviceID=*/nullptr, - /*RTLoc=*/nullptr, AllocaIP, + /*RTLoc=*/nullptr, AllocIP, Dependencies, EmptyRTArgs, HasNoWait); } return EmitTargetCallFallbackCB(Builder.saveIP()); @@ -8557,13 +8524,14 @@ static void emitTargetCall( }; auto &&EmitTargetCallThen = - [&](OpenMPIRBuilder::InsertPointTy AllocaIP, - OpenMPIRBuilder::InsertPointTy CodeGenIP) -> Error { + [&](OpenMPIRBuilder::InsertPointTy AllocIP, + OpenMPIRBuilder::InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) -> Error { Info.HasNoWait = HasNoWait; OpenMPIRBuilder::MapInfosTy &MapInfo = GenMapInfoCB(Builder.saveIP()); OpenMPIRBuilder::TargetDataRTArgs RTArgs; if (Error Err = OMPBuilder.emitOffloadingArraysAndArgs( - AllocaIP, Builder.saveIP(), Info, RTArgs, MapInfo, CustomMapperCB, + AllocIP, Builder.saveIP(), Info, RTArgs, MapInfo, CustomMapperCB, /*IsNonContiguous=*/true, /*ForEndCall=*/false)) return Err; @@ -8636,13 +8604,13 @@ static void emitTargetCall( // The presence of certain clauses on the target directive require the // explicit generation of the target task. if (RequiresOuterTargetTask) - return OMPBuilder.emitTargetTask(TaskBodyCB, DeviceID, RTLoc, AllocaIP, + return OMPBuilder.emitTargetTask(TaskBodyCB, DeviceID, RTLoc, AllocIP, Dependencies, KArgs.RTArgs, Info.HasNoWait); return OMPBuilder.emitKernelLaunch(Builder, OutlinedFnID, EmitTargetCallFallbackCB, KArgs, - DeviceID, RTLoc, AllocaIP); + DeviceID, RTLoc, AllocIP); }()); Builder.restoreIP(AfterIP); @@ -8653,24 +8621,24 @@ static void emitTargetCall( // wasn't created. In this case we just run the host fallback directly and // ignore any potential 'if' clauses. if (!OutlinedFnID) { - cantFail(EmitTargetCallElse(AllocaIP, Builder.saveIP())); + cantFail(EmitTargetCallElse(AllocIP, Builder.saveIP(), DeallocIPs)); return; } // If there's no 'if' clause, only generate the kernel launch code path. if (!IfCond) { - cantFail(EmitTargetCallThen(AllocaIP, Builder.saveIP())); + cantFail(EmitTargetCallThen(AllocIP, Builder.saveIP(), DeallocIPs)); return; } cantFail(OMPBuilder.emitIfClause(IfCond, EmitTargetCallThen, - EmitTargetCallElse, AllocaIP)); + EmitTargetCallElse, AllocIP)); } OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTarget( - const LocationDescription &Loc, bool IsOffloadEntry, InsertPointTy AllocaIP, - InsertPointTy CodeGenIP, TargetDataInfo &Info, - TargetRegionEntryInfo &EntryInfo, + const LocationDescription &Loc, bool IsOffloadEntry, InsertPointTy AllocIP, + InsertPointTy CodeGenIP, ArrayRef DeallocIPs, + TargetDataInfo &Info, TargetRegionEntryInfo &EntryInfo, const TargetKernelDefaultAttrs &DefaultAttrs, const TargetKernelRuntimeAttrs &RuntimeAttrs, Value *IfCond, SmallVectorImpl &Inputs, GenMapInfoCallbackTy GenMapInfoCB, @@ -8698,9 +8666,9 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTarget( // to make a remote call (offload) to the previously outlined function // that represents the target region. Do that now. if (!Config.isTargetDevice()) - emitTargetCall(*this, Builder, AllocaIP, Info, DefaultAttrs, RuntimeAttrs, - IfCond, OutlinedFn, OutlinedFnID, Inputs, GenMapInfoCB, - CustomMapperCB, Dependencies, HasNowait); + emitTargetCall(*this, Builder, AllocIP, DeallocIPs, Info, DefaultAttrs, + RuntimeAttrs, IfCond, OutlinedFn, OutlinedFnID, Inputs, + GenMapInfoCB, CustomMapperCB, Dependencies, HasNowait); return Builder.saveIP(); } @@ -9479,15 +9447,16 @@ void OpenMPIRBuilder::emitBlock(BasicBlock *BB, Function *CurFn, Error OpenMPIRBuilder::emitIfClause(Value *Cond, BodyGenCallbackTy ThenGen, BodyGenCallbackTy ElseGen, - InsertPointTy AllocaIP) { + InsertPointTy AllocIP, + ArrayRef DeallocIPs) { // If the condition constant folds and can be elided, try to avoid emitting // the condition and the dead arm of the if/else. if (auto *CI = dyn_cast(Cond)) { auto CondConstant = CI->getSExtValue(); if (CondConstant) - return ThenGen(AllocaIP, Builder.saveIP()); + return ThenGen(AllocIP, Builder.saveIP(), DeallocIPs); - return ElseGen(AllocaIP, Builder.saveIP()); + return ElseGen(AllocIP, Builder.saveIP(), DeallocIPs); } Function *CurFn = Builder.GetInsertBlock()->getParent(); @@ -9500,13 +9469,13 @@ Error OpenMPIRBuilder::emitIfClause(Value *Cond, BodyGenCallbackTy ThenGen, Builder.CreateCondBr(Cond, ThenBlock, ElseBlock); // Emit the 'then' code. emitBlock(ThenBlock, CurFn); - if (Error Err = ThenGen(AllocaIP, Builder.saveIP())) + if (Error Err = ThenGen(AllocIP, Builder.saveIP(), DeallocIPs)) return Err; emitBranch(ContBlock); // Emit the 'else' code if present. // There is no need to emit line number for unconditional branch. emitBlock(ElseBlock, CurFn); - if (Error Err = ElseGen(AllocaIP, Builder.saveIP())) + if (Error Err = ElseGen(AllocIP, Builder.saveIP(), DeallocIPs)) return Err; // There is no need to emit line number for unconditional branch. emitBranch(ContBlock); @@ -10214,13 +10183,14 @@ OpenMPIRBuilder::createTeams(const LocationDescription &Loc, // Generate the body of teams. InsertPointTy AllocaIP(AllocaBB, AllocaBB->begin()); InsertPointTy CodeGenIP(BodyBB, BodyBB->begin()); - if (Error Err = BodyGenCB(AllocaIP, CodeGenIP)) + InsertPointTy DeallocIP(ExitBB, ExitBB->begin()); + if (Error Err = BodyGenCB(AllocaIP, CodeGenIP, DeallocIP)) return Err; auto OI = std::make_unique(); OI->EntryBB = AllocaBB; OI->ExitBB = ExitBB; - OI->OuterAllocaBB = &OuterAllocaBB; + OI->OuterAllocBB = &OuterAllocaBB; // Insert fake values for global tid and bound tid. SmallVector ToBeDeleted; @@ -10276,14 +10246,13 @@ OpenMPIRBuilder::createTeams(const LocationDescription &Loc, return Builder.saveIP(); } -OpenMPIRBuilder::InsertPointOrErrorTy -OpenMPIRBuilder::createDistribute(const LocationDescription &Loc, - InsertPointTy OuterAllocaIP, - BodyGenCallbackTy BodyGenCB) { +OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createDistribute( + const LocationDescription &Loc, InsertPointTy OuterAllocIP, + ArrayRef OuterDeallocIPs, BodyGenCallbackTy BodyGenCB) { if (!updateToLocation(Loc)) return InsertPointTy(); - BasicBlock *OuterAllocaBB = OuterAllocaIP.getBlock(); + BasicBlock *OuterAllocaBB = OuterAllocIP.getBlock(); if (OuterAllocaBB == Builder.GetInsertBlock()) { BasicBlock *BodyBB = @@ -10300,16 +10269,21 @@ OpenMPIRBuilder::createDistribute(const LocationDescription &Loc, // Generate the body of distribute clause InsertPointTy AllocaIP(AllocaBB, AllocaBB->begin()); InsertPointTy CodeGenIP(BodyBB, BodyBB->begin()); - if (Error Err = BodyGenCB(AllocaIP, CodeGenIP)) + InsertPointTy DeallocIP(ExitBB, ExitBB->begin()); + if (Error Err = BodyGenCB(AllocaIP, CodeGenIP, DeallocIP)) return Err; // When using target we use different runtime functions which require a // callback. if (Config.isTargetDevice()) { auto OI = std::make_unique(); - OI->OuterAllocaBB = OuterAllocaIP.getBlock(); + OI->OuterAllocBB = OuterAllocIP.getBlock(); OI->EntryBB = AllocaBB; OI->ExitBB = ExitBB; + OI->OuterDeallocBBs.reserve(OuterDeallocIPs.size()); + for (InsertPointTy DeallocIP : OuterDeallocIPs) + OI->OuterDeallocBBs.push_back(DeallocIP.getBlock()); + addOutlineInfo(std::move(OI)); } Builder.SetInsertPoint(ExitBB, ExitBB->begin()); @@ -10374,32 +10348,33 @@ std::unique_ptr OpenMPIRBuilder::OutlineInfo::createCodeExtractor(ArrayRef Blocks, bool ArgsInZeroAddressSpace, Twine Suffix) { - return std::make_unique(Blocks, /* DominatorTree */ nullptr, - /* AggregateArgs */ true, - /* BlockFrequencyInfo */ nullptr, - /* BranchProbabilityInfo */ nullptr, - /* AssumptionCache */ nullptr, - /* AllowVarArgs */ true, - /* AllowAlloca */ true, - /* AllocationBlock*/ OuterAllocaBB, - /* DeallocationBlock */ nullptr, - /* Suffix */ Suffix.str(), - ArgsInZeroAddressSpace); + return std::make_unique( + Blocks, /* DominatorTree */ nullptr, + /* AggregateArgs */ true, + /* BlockFrequencyInfo */ nullptr, + /* BranchProbabilityInfo */ nullptr, + /* AssumptionCache */ nullptr, + /* AllowVarArgs */ true, + /* AllowAlloca */ true, + /* AllocationBlock*/ OuterAllocBB, + /* DeallocationBlocks */ ArrayRef(), + /* Suffix */ Suffix.str(), ArgsInZeroAddressSpace); } std::unique_ptr DeviceSharedMemOutlineInfo::createCodeExtractor( ArrayRef Blocks, bool ArgsInZeroAddressSpace, Twine Suffix) { - // TODO: Initialize the DeallocationBlock with a proper pair to OuterAllocaBB. return std::make_unique( - OMPBuilder, AllocBlockOverride, Blocks, /* DominatorTree */ nullptr, + OMPBuilder, Blocks, /* DominatorTree */ nullptr, /* AggregateArgs */ true, /* BlockFrequencyInfo */ nullptr, /* BranchProbabilityInfo */ nullptr, /* AssumptionCache */ nullptr, /* AllowVarArgs */ true, /* AllowAlloca */ true, - /* AllocationBlock*/ OuterAllocaBB, - /* DeallocationBlock */ ExitBB, + /* AllocationBlock*/ OuterAllocBB, + /* DeallocationBlocks */ OuterDeallocBBs.empty() + ? SmallVector{ExitBB} + : OuterDeallocBBs, /* Suffix */ Suffix.str(), ArgsInZeroAddressSpace); } diff --git a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp index 57809017a75a4..e8f3c68f90980 100644 --- a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp +++ b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp @@ -721,7 +721,7 @@ bool HotColdSplitting::outlineColdRegions(Function &F, bool HasProfileSummary) { SubRegion, &*DT, /* AggregateArgs */ false, /* BFI */ nullptr, /* BPI */ nullptr, AC, /* AllowVarArgs */ false, /* AllowAlloca */ false, /* AllocaBlock */ nullptr, - /* DeallocationBlock */ nullptr, + /* DeallocationBlocks */ {}, /* Suffix */ "cold." + std::to_string(OutlinedFunctionID)); if (CE.isEligible() && isSplittingBeneficial(CE, SubRegion, TTI) && diff --git a/llvm/lib/Transforms/IPO/IROutliner.cpp b/llvm/lib/Transforms/IPO/IROutliner.cpp index 177c10ef53040..cbfb4c287fc8b 100644 --- a/llvm/lib/Transforms/IPO/IROutliner.cpp +++ b/llvm/lib/Transforms/IPO/IROutliner.cpp @@ -2826,7 +2826,7 @@ unsigned IROutliner::doOutline(Module &M) { OS->Candidate->getBasicBlocks(BlocksInRegion, BE); OS->CE = new (ExtractorAllocator.Allocate()) CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false, - false, nullptr, nullptr, "outlined"); + false, nullptr, {}, "outlined"); findAddInputsOutputs(M, *OS, NotSame); if (!OS->IgnoreRegion) OutlinedRegions.push_back(OS); @@ -2937,7 +2937,7 @@ unsigned IROutliner::doOutline(Module &M) { OS->Candidate->getBasicBlocks(BlocksInRegion, BE); OS->CE = new (ExtractorAllocator.Allocate()) CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false, - false, nullptr, nullptr, "outlined"); + false, nullptr, {}, "outlined"); bool FunctionOutlined = extractSection(*OS); if (FunctionOutlined) { unsigned StartIdx = OS->Candidate->getStartIdx(); diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index d9c8412008a21..20fcb7307ff7d 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -1086,7 +1086,8 @@ struct OpenMPOpt { SmallDenseMap> BB2PRMap; BasicBlock *StartBB = nullptr, *EndBB = nullptr; - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) { BasicBlock *CGStartBB = CodeGenIP.getBlock(); BasicBlock *CGEndBB = SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI); @@ -1126,7 +1127,8 @@ struct OpenMPOpt { const DebugLoc DL = ParentBB->getTerminator()->getDebugLoc(); ParentBB->getTerminator()->eraseFromParent(); - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) { BasicBlock *CGStartBB = CodeGenIP.getBlock(); BasicBlock *CGEndBB = SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI); @@ -1256,8 +1258,9 @@ struct OpenMPOpt { // avoid overriding binding settings, and without explicit cancellation. OpenMPIRBuilder::InsertPointTy AfterIP = cantFail(OMPInfoCache.OMPBuilder.createParallel( - Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr, nullptr, - OMP_PROC_BIND_default, /* IsCancellable */ false)); + Loc, AllocaIP, /* DeallocIPs */ {}, BodyGenCB, PrivCB, FiniCB, + nullptr, nullptr, OMP_PROC_BIND_default, + /* IsCancellable */ false)); BranchInst::Create(AfterBB, AfterIP.getBlock()); // Perform the actual outlining. diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 3339f5e4fea7d..c484968076249 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -264,11 +264,11 @@ CodeExtractor::CodeExtractor(ArrayRef BBs, DominatorTree *DT, BranchProbabilityInfo *BPI, AssumptionCache *AC, bool AllowVarArgs, bool AllowAlloca, BasicBlock *AllocationBlock, - BasicBlock *DeallocationBlock, std::string Suffix, - bool ArgsInZeroAddressSpace) + ArrayRef DeallocationBlocks, + std::string Suffix, bool ArgsInZeroAddressSpace) : DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), BPI(BPI), AC(AC), AllocationBlock(AllocationBlock), - DeallocationBlock(DeallocationBlock), AllowVarArgs(AllowVarArgs), + DeallocationBlocks(DeallocationBlocks), AllowVarArgs(AllowVarArgs), Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs, AllowAlloca)), Suffix(Suffix), ArgsInZeroAddressSpace(ArgsInZeroAddressSpace) {} @@ -2029,22 +2029,25 @@ CallInst *CodeExtractor::emitReplacerCall( {}, call); // Deallocate intermediate variables if they need explicit deallocation. - BasicBlock *DeallocBlock = codeReplacer; - BasicBlock::iterator DeallocIP = codeReplacer->end(); - if (DeallocationBlock) { - DeallocBlock = DeallocationBlock; - DeallocIP = DeallocationBlock->getFirstInsertionPt(); - } + auto deallocVars = [&](BasicBlock *DeallocBlock, + BasicBlock::iterator DeallocIP) { + int Index = 0; + for (Value *Output : outputs) { + if (!StructValues.contains(Output)) + deallocateVar(DeallocBlock, DeallocIP, ReloadOutputs[Index++], + Output->getType()); + } - int Index = 0; - for (Value *Output : outputs) { - if (!StructValues.contains(Output)) - deallocateVar(DeallocBlock, DeallocIP, ReloadOutputs[Index++], - Output->getType()); - } + if (Struct) + deallocateVar(DeallocBlock, DeallocIP, Struct, StructArgTy); + }; - if (Struct) - deallocateVar(DeallocBlock, DeallocIP, Struct, StructArgTy); + if (DeallocationBlocks.empty()) { + deallocVars(codeReplacer, codeReplacer->end()); + } else { + for (BasicBlock *DeallocationBlock : DeallocationBlocks) + deallocVars(DeallocationBlock, DeallocationBlock->getFirstInsertionPt()); + } return call; } diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index c13570dc803b3..1e5b8145d5cdc 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -55,8 +55,9 @@ using namespace omp; } #define BODYGENCB_WRAPPER(cb) \ - [&cb](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) -> Error { \ - cb(AllocaIP, CodeGenIP); \ + [&cb](InsertPointTy AllocIP, InsertPointTy CodeGenIP, \ + ArrayRef DeallocIPs) -> Error { \ + cb(AllocIP, CodeGenIP, DeallocIPs); \ return Error::success(); \ } @@ -664,10 +665,11 @@ TEST_F(OpenMPIRBuilderTest, ParallelSimpleGPU) { unsigned NumPrivatizedVars = 0; unsigned NumFinalizationPoints = 0; - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) { ++NumBodiesGenerated; - Builder.restoreIP(AllocaIP); + Builder.restoreIP(AllocIP); PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); Builder.CreateStore(F->arg_begin(), PrivAI); @@ -715,8 +717,8 @@ TEST_F(OpenMPIRBuilderTest, ParallelSimpleGPU) { F->getEntryBlock().getFirstInsertionPt()); ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, OMPBuilder.createParallel( - Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr, - nullptr, OMP_PROC_BIND_default, false)); + Loc, AllocaIP, {}, BodyGenCB, PrivCB, FiniCB, + nullptr, nullptr, OMP_PROC_BIND_default, false)); EXPECT_EQ(NumBodiesGenerated, 1U); EXPECT_EQ(NumPrivatizedVars, 1U); @@ -777,10 +779,11 @@ TEST_F(OpenMPIRBuilderTest, ParallelSimple) { unsigned NumPrivatizedVars = 0; unsigned NumFinalizationPoints = 0; - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) { ++NumBodiesGenerated; - Builder.restoreIP(AllocaIP); + Builder.restoreIP(AllocIP); PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); Builder.CreateStore(F->arg_begin(), PrivAI); @@ -828,8 +831,8 @@ TEST_F(OpenMPIRBuilderTest, ParallelSimple) { F->getEntryBlock().getFirstInsertionPt()); ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, OMPBuilder.createParallel( - Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr, - nullptr, OMP_PROC_BIND_default, false)); + Loc, AllocaIP, {}, BodyGenCB, PrivCB, FiniCB, + nullptr, nullptr, OMP_PROC_BIND_default, false)); EXPECT_EQ(NumBodiesGenerated, 1U); EXPECT_EQ(NumPrivatizedVars, 1U); EXPECT_EQ(NumFinalizationPoints, 1U); @@ -885,7 +888,8 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested) { unsigned NumOuterBodiesGenerated = 0; unsigned NumFinalizationPoints = 0; - auto InnerBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto InnerBodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) { ++NumInnerBodiesGenerated; return Error::success(); }; @@ -908,7 +912,8 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested) { return Error::success(); }; - auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto OuterBodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) { ++NumOuterBodiesGenerated; Builder.restoreIP(CodeGenIP); BasicBlock *CGBB = CodeGenIP.getBlock(); @@ -917,7 +922,7 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested) { ASSERT_EXPECTED_INIT( OpenMPIRBuilder::InsertPointTy, AfterIP, - OMPBuilder.createParallel(InsertPointTy(CGBB, CGBB->end()), AllocaIP, + OMPBuilder.createParallel(InsertPointTy(CGBB, CGBB->end()), AllocIP, {}, InnerBodyGenCB, PrivCB, FiniCB, nullptr, nullptr, OMP_PROC_BIND_default, false)); @@ -929,7 +934,7 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested) { F->getEntryBlock().getFirstInsertionPt()); ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, OMPBuilder.createParallel( - Loc, AllocaIP, BODYGENCB_WRAPPER(OuterBodyGenCB), + Loc, AllocaIP, {}, BODYGENCB_WRAPPER(OuterBodyGenCB), PrivCB, FiniCB, nullptr, nullptr, OMP_PROC_BIND_default, false)); @@ -986,7 +991,8 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested2Inner) { unsigned NumOuterBodiesGenerated = 0; unsigned NumFinalizationPoints = 0; - auto InnerBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto InnerBodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) { ++NumInnerBodiesGenerated; return Error::success(); }; @@ -1009,7 +1015,8 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested2Inner) { return Error::success(); }; - auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto OuterBodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) { ++NumOuterBodiesGenerated; Builder.restoreIP(CodeGenIP); BasicBlock *CGBB = CodeGenIP.getBlock(); @@ -1022,18 +1029,18 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested2Inner) { ASSERT_EXPECTED_INIT( OpenMPIRBuilder::InsertPointTy, AfterIP1, - OMPBuilder.createParallel(InsertPointTy(CGBB, CGBB->end()), AllocaIP, + OMPBuilder.createParallel(InsertPointTy(CGBB, CGBB->end()), AllocIP, {}, InnerBodyGenCB, PrivCB, FiniCB, nullptr, nullptr, OMP_PROC_BIND_default, false)); Builder.restoreIP(AfterIP1); Builder.CreateBr(NewBB1); - ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP2, - OMPBuilder.createParallel( - InsertPointTy(NewBB1, NewBB1->end()), AllocaIP, - InnerBodyGenCB, PrivCB, FiniCB, nullptr, nullptr, - OMP_PROC_BIND_default, false)); + ASSERT_EXPECTED_INIT( + OpenMPIRBuilder::InsertPointTy, AfterIP2, + OMPBuilder.createParallel(InsertPointTy(NewBB1, NewBB1->end()), AllocIP, + {}, InnerBodyGenCB, PrivCB, FiniCB, nullptr, + nullptr, OMP_PROC_BIND_default, false)); Builder.restoreIP(AfterIP2); Builder.CreateBr(NewBB2); @@ -1043,7 +1050,7 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested2Inner) { F->getEntryBlock().getFirstInsertionPt()); ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, OMPBuilder.createParallel( - Loc, AllocaIP, BODYGENCB_WRAPPER(OuterBodyGenCB), + Loc, AllocaIP, {}, BODYGENCB_WRAPPER(OuterBodyGenCB), PrivCB, FiniCB, nullptr, nullptr, OMP_PROC_BIND_default, false)); @@ -1107,10 +1114,11 @@ TEST_F(OpenMPIRBuilderTest, ParallelIfCond) { unsigned NumPrivatizedVars = 0; unsigned NumFinalizationPoints = 0; - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) { ++NumBodiesGenerated; - Builder.restoreIP(AllocaIP); + Builder.restoreIP(AllocIP); PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); Builder.CreateStore(F->arg_begin(), PrivAI); @@ -1159,7 +1167,7 @@ TEST_F(OpenMPIRBuilderTest, ParallelIfCond) { F->getEntryBlock().getFirstInsertionPt()); ASSERT_EXPECTED_INIT( OpenMPIRBuilder::InsertPointTy, AfterIP, - OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, + OMPBuilder.createParallel(Loc, AllocaIP, {}, BodyGenCB, PrivCB, FiniCB, Builder.CreateIsNotNull(F->arg_begin()), nullptr, OMP_PROC_BIND_default, false)); @@ -1214,7 +1222,8 @@ TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) { unsigned NumFinalizationPoints = 0; CallInst *CheckedBarrier = nullptr; - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) { ++NumBodiesGenerated; Builder.restoreIP(CodeGenIP); @@ -1282,11 +1291,12 @@ TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) { IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), F->getEntryBlock().getFirstInsertionPt()); - ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, - OMPBuilder.createParallel( - Loc, AllocaIP, BODYGENCB_WRAPPER(BodyGenCB), PrivCB, - FiniCB, Builder.CreateIsNotNull(F->arg_begin()), - nullptr, OMP_PROC_BIND_default, true)); + ASSERT_EXPECTED_INIT( + OpenMPIRBuilder::InsertPointTy, AfterIP, + OMPBuilder.createParallel(Loc, AllocaIP, {}, BODYGENCB_WRAPPER(BodyGenCB), + PrivCB, FiniCB, + Builder.CreateIsNotNull(F->arg_begin()), + nullptr, OMP_PROC_BIND_default, true)); EXPECT_EQ(NumBodiesGenerated, 1U); EXPECT_EQ(NumPrivatizedVars, 0U); @@ -1351,7 +1361,8 @@ TEST_F(OpenMPIRBuilderTest, ParallelForwardAsPointers) { Value *StructPtrVal = Builder.CreateCall(RetStructPtrFunc); Instruction *Internal; - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) { IRBuilder<>::InsertPointGuard Guard(Builder); Builder.restoreIP(CodeGenIP); Internal = Builder.CreateCall(TakeI32Func, I32Val); @@ -1371,8 +1382,8 @@ TEST_F(OpenMPIRBuilderTest, ParallelForwardAsPointers) { F->getEntryBlock().getFirstInsertionPt()); ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, OMPBuilder.createParallel( - Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr, - nullptr, OMP_PROC_BIND_default, false)); + Loc, AllocaIP, {}, BodyGenCB, PrivCB, FiniCB, + nullptr, nullptr, OMP_PROC_BIND_default, false)); Builder.restoreIP(AfterIP); Builder.CreateRetVoid(); @@ -2875,9 +2886,10 @@ TEST_F(OpenMPIRBuilderTest, MasterDirective) { BasicBlock *EntryBB = nullptr; BasicBlock *ThenBB = nullptr; - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { - if (AllocaIP.isSet()) - Builder.restoreIP(AllocaIP); + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) { + if (AllocIP.isSet()) + Builder.restoreIP(AllocIP); else Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt())); PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); @@ -2956,9 +2968,10 @@ TEST_F(OpenMPIRBuilderTest, MaskedDirective) { BasicBlock *EntryBB = nullptr; BasicBlock *ThenBB = nullptr; - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { - if (AllocaIP.isSet()) - Builder.restoreIP(AllocaIP); + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) { + if (AllocIP.isSet()) + Builder.restoreIP(AllocIP); else Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt())); PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); @@ -3035,7 +3048,8 @@ TEST_F(OpenMPIRBuilderTest, CriticalDirective) { AllocaInst *PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) { // actual start for bodyCB llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint(); @@ -3286,7 +3300,8 @@ TEST_F(OpenMPIRBuilderTest, OrderedDirectiveThreads) { AllocaInst *PrivAI = Builder.CreateAlloca(F->arg_begin()->getType(), nullptr, "priv.inst"); - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) { llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint(); EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst); @@ -3360,7 +3375,8 @@ TEST_F(OpenMPIRBuilderTest, OrderedDirectiveSimd) { AllocaInst *PrivAI = Builder.CreateAlloca(F->arg_begin()->getType(), nullptr, "priv.inst"); - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) { llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint(); EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst); @@ -3467,9 +3483,10 @@ TEST_F(OpenMPIRBuilderTest, SingleDirective) { BasicBlock *EntryBB = nullptr; BasicBlock *ThenBB = nullptr; - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { - if (AllocaIP.isSet()) - Builder.restoreIP(AllocaIP); + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) { + if (AllocIP.isSet()) + Builder.restoreIP(AllocIP); else Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt())); PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); @@ -3560,9 +3577,10 @@ TEST_F(OpenMPIRBuilderTest, SingleDirectiveNowait) { BasicBlock *EntryBB = nullptr; BasicBlock *ThenBB = nullptr; - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { - if (AllocaIP.isSet()) - Builder.restoreIP(AllocaIP); + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) { + if (AllocIP.isSet()) + Builder.restoreIP(AllocIP); else Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt())); PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); @@ -3681,9 +3699,10 @@ TEST_F(OpenMPIRBuilderTest, SingleDirectiveCopyPrivate) { Function *CopyFunc = Function::Create(CopyFuncTy, Function::PrivateLinkage, "copy_var", *M); - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { - if (AllocaIP.isSet()) - Builder.restoreIP(AllocaIP); + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) { + if (AllocIP.isSet()) + Builder.restoreIP(AllocIP); else Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt())); PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); @@ -4545,8 +4564,9 @@ TEST_F(OpenMPIRBuilderTest, CreateTeams) { AllocaInst *ValPtr128 = Builder.CreateAlloca(Builder.getInt128Ty()); Value *Val128 = Builder.CreateLoad(Builder.getInt128Ty(), ValPtr128, "load"); - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { - Builder.restoreIP(AllocaIP); + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) { + Builder.restoreIP(AllocIP); AllocaInst *Local128 = Builder.CreateAlloca(Builder.getInt128Ty(), nullptr, "bodygen.alloca128"); @@ -4626,7 +4646,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTeamsWithThreadLimit) { Function::Create(FunctionType::get(Builder.getVoidTy(), false), GlobalValue::ExternalLinkage, "fakeFunction", M.get()); - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) { Builder.restoreIP(CodeGenIP); Builder.CreateCall(FakeFunction, {}); return Error::success(); @@ -4682,7 +4703,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTeamsWithNumTeamsUpper) { Function::Create(FunctionType::get(Builder.getVoidTy(), false), GlobalValue::ExternalLinkage, "fakeFunction", M.get()); - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) { Builder.restoreIP(CodeGenIP); Builder.CreateCall(FakeFunction, {}); return Error::success(); @@ -4744,7 +4766,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTeamsWithNumTeamsBoth) { Value *NumTeamsUpper = Builder.CreateAdd(F->arg_begin(), Builder.getInt32(10), "numTeamsUpper"); - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) { Builder.restoreIP(CodeGenIP); Builder.CreateCall(FakeFunction, {}); return Error::success(); @@ -4811,7 +4834,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTeamsWithNumTeamsAndThreadLimit) { Function::Create(FunctionType::get(Builder.getVoidTy(), false), GlobalValue::ExternalLinkage, "fakeFunction", M.get()); - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) { Builder.restoreIP(CodeGenIP); Builder.CreateCall(FakeFunction, {}); return Error::success(); @@ -4868,7 +4892,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTeamsWithIfCondition) { Function::Create(FunctionType::get(Builder.getVoidTy(), false), GlobalValue::ExternalLinkage, "fakeFunction", M.get()); - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) { Builder.restoreIP(CodeGenIP); Builder.CreateCall(FakeFunction, {}); return Error::success(); @@ -4935,7 +4960,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTeamsWithIfConditionAndNumTeams) { Function::Create(FunctionType::get(Builder.getVoidTy(), false), GlobalValue::ExternalLinkage, "fakeFunction", M.get()); - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) { Builder.restoreIP(CodeGenIP); Builder.CreateCall(FakeFunction, {}); return Error::success(); @@ -5153,7 +5179,8 @@ TEST_F(OpenMPIRBuilderTest, CreateReductions) { // xor of thread-id; // and store the result in global variables. InsertPointTy BodyIP, BodyAllocaIP; - auto BodyGenCB = [&](InsertPointTy InnerAllocaIP, InsertPointTy CodeGenIP) { + auto BodyGenCB = [&](InsertPointTy InnerAllocIP, InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) { IRBuilderBase::InsertPointGuard Guard(Builder); Builder.restoreIP(CodeGenIP); @@ -5171,7 +5198,7 @@ TEST_F(OpenMPIRBuilderTest, CreateReductions) { Builder.CreateStore(Xor, XorReduced); BodyIP = Builder.saveIP(); - BodyAllocaIP = InnerAllocaIP; + BodyAllocaIP = InnerAllocIP; return Error::success(); }; @@ -5207,12 +5234,12 @@ TEST_F(OpenMPIRBuilderTest, CreateReductions) { // Do nothing in finalization. auto FiniCB = [&](InsertPointTy CodeGenIP) { return Error::success(); }; - ASSERT_EXPECTED_INIT( - OpenMPIRBuilder::InsertPointTy, AfterIP, - OMPBuilder.createParallel(Loc, OuterAllocaIP, BodyGenCB, PrivCB, FiniCB, - /* IfCondition */ nullptr, - /* NumThreads */ nullptr, OMP_PROC_BIND_default, - /* IsCancellable */ false)); + ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, + OMPBuilder.createParallel( + Loc, OuterAllocaIP, {}, BodyGenCB, PrivCB, FiniCB, + /* IfCondition */ nullptr, + /* NumThreads */ nullptr, OMP_PROC_BIND_default, + /* IsCancellable */ false)); Builder.restoreIP(AfterIP); OpenMPIRBuilder::ReductionInfo ReductionInfos[] = { @@ -5531,8 +5558,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTwoReductions) { Builder.CreateStore(Builder.getInt32(1), XorReduced); InsertPointTy FirstBodyIP, FirstBodyAllocaIP; - auto FirstBodyGenCB = [&](InsertPointTy InnerAllocaIP, - InsertPointTy CodeGenIP) { + auto FirstBodyGenCB = [&](InsertPointTy InnerAllocIP, InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) { IRBuilderBase::InsertPointGuard Guard(Builder); Builder.restoreIP(CodeGenIP); @@ -5547,13 +5574,14 @@ TEST_F(OpenMPIRBuilderTest, CreateTwoReductions) { Builder.CreateStore(Sum, SumReduced); FirstBodyIP = Builder.saveIP(); - FirstBodyAllocaIP = InnerAllocaIP; + FirstBodyAllocaIP = InnerAllocIP; return Error::success(); }; InsertPointTy SecondBodyIP, SecondBodyAllocaIP; - auto SecondBodyGenCB = [&](InsertPointTy InnerAllocaIP, - InsertPointTy CodeGenIP) { + auto SecondBodyGenCB = [&](InsertPointTy InnerAllocIP, + InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) { IRBuilderBase::InsertPointGuard Guard(Builder); Builder.restoreIP(CodeGenIP); @@ -5566,7 +5594,7 @@ TEST_F(OpenMPIRBuilderTest, CreateTwoReductions) { Builder.CreateStore(Xor, XorReduced); SecondBodyIP = Builder.saveIP(); - SecondBodyAllocaIP = InnerAllocaIP; + SecondBodyAllocaIP = InnerAllocIP; return Error::success(); }; @@ -5606,14 +5634,14 @@ TEST_F(OpenMPIRBuilderTest, CreateTwoReductions) { ASSERT_EXPECTED_INIT( OpenMPIRBuilder::InsertPointTy, AfterIP1, - OMPBuilder.createParallel(Loc, OuterAllocaIP, FirstBodyGenCB, PrivCB, + OMPBuilder.createParallel(Loc, OuterAllocaIP, {}, FirstBodyGenCB, PrivCB, FiniCB, /* IfCondition */ nullptr, /* NumThreads */ nullptr, OMP_PROC_BIND_default, /* IsCancellable */ false)); Builder.restoreIP(AfterIP1); ASSERT_EXPECTED_INIT( OpenMPIRBuilder::InsertPointTy, AfterIP2, - OMPBuilder.createParallel({Builder.saveIP(), DL}, OuterAllocaIP, + OMPBuilder.createParallel({Builder.saveIP(), DL}, OuterAllocaIP, {}, SecondBodyGenCB, PrivCB, FiniCB, /* IfCondition */ nullptr, /* NumThreads */ nullptr, OMP_PROC_BIND_default, @@ -5707,7 +5735,8 @@ TEST_F(OpenMPIRBuilderTest, CreateSectionsSimple) { llvm::SmallVector SectionCBVector; auto FiniCB = [&](InsertPointTy IP) { return Error::success(); }; - auto SectionCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto SectionCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) { return Error::success(); }; SectionCBVector.push_back(SectionCB); @@ -5752,7 +5781,8 @@ TEST_F(OpenMPIRBuilderTest, CreateSections) { EXPECT_NE(IPBB->end(), IP.getPoint()); }; - auto SectionCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto SectionCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) { ++NumBodiesGenerated; CaseBBs.push_back(CodeGenIP.getBlock()); SwitchBB = CodeGenIP.getBlock()->getSinglePredecessor(); @@ -6092,7 +6122,7 @@ TEST_F(OpenMPIRBuilderTest, TargetEnterData) { ASSERT_EXPECTED_INIT( OpenMPIRBuilder::InsertPointTy, AfterIP, OMPBuilder.createTargetData( - Loc, AllocaIP, Builder.saveIP(), Builder.getInt64(DeviceID), + Loc, AllocaIP, Builder.saveIP(), {}, Builder.getInt64(DeviceID), /* IfCond= */ nullptr, Info, GenMapInfoCB, CustomMapperCB, &RTLFunc)); Builder.restoreIP(AfterIP); @@ -6155,7 +6185,7 @@ TEST_F(OpenMPIRBuilderTest, TargetExitData) { ASSERT_EXPECTED_INIT( OpenMPIRBuilder::InsertPointTy, AfterIP, OMPBuilder.createTargetData( - Loc, AllocaIP, Builder.saveIP(), Builder.getInt64(DeviceID), + Loc, AllocaIP, Builder.saveIP(), {}, Builder.getInt64(DeviceID), /* IfCond= */ nullptr, Info, GenMapInfoCB, CustomMapperCB, &RTLFunc)); Builder.restoreIP(AfterIP); @@ -6266,7 +6296,7 @@ TEST_F(OpenMPIRBuilderTest, TargetDataRegion) { ASSERT_EXPECTED_INIT( OpenMPIRBuilder::InsertPointTy, TargetDataIP1, - OMPBuilder.createTargetData(Loc, AllocaIP, Builder.saveIP(), + OMPBuilder.createTargetData(Loc, AllocaIP, Builder.saveIP(), {}, Builder.getInt64(DeviceID), /* IfCond= */ nullptr, Info, GenMapInfoCB, CustomMapperCB, nullptr, BodyCB)); @@ -6295,7 +6325,7 @@ TEST_F(OpenMPIRBuilderTest, TargetDataRegion) { }; ASSERT_EXPECTED_INIT( OpenMPIRBuilder::InsertPointTy, TargetDataIP2, - OMPBuilder.createTargetData(Loc, AllocaIP, Builder.saveIP(), + OMPBuilder.createTargetData(Loc, AllocaIP, Builder.saveIP(), {}, Builder.getInt64(DeviceID), /* IfCond= */ nullptr, Info, GenMapInfoCB, CustomMapperCB, nullptr, BodyTargetCB)); @@ -6346,8 +6376,8 @@ TEST_F(OpenMPIRBuilderTest, TargetRegion) { Builder.CreateStore(Builder.getInt32(10), APtr); Builder.CreateStore(Builder.getInt32(20), BPtr); - auto BodyGenCB = [&](InsertPointTy AllocaIP, - InsertPointTy CodeGenIP) -> InsertPointTy { + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) -> InsertPointTy { IRBuilderBase::InsertPointGuard guard(Builder); Builder.SetCurrentDebugLocation(llvm::DebugLoc()); Builder.restoreIP(CodeGenIP); @@ -6417,10 +6447,10 @@ TEST_F(OpenMPIRBuilderTest, TargetRegion) { ASSERT_EXPECTED_INIT( OpenMPIRBuilder::InsertPointTy, AfterIP, OMPBuilder.createTarget(OmpLoc, /*IsOffloadEntry=*/true, Builder.saveIP(), - Builder.saveIP(), Info, EntryInfo, DefaultAttrs, - RuntimeAttrs, /*IfCond=*/nullptr, Inputs, - GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB, - CustomMapperCB, {}, false)); + Builder.saveIP(), {}, Info, EntryInfo, + DefaultAttrs, RuntimeAttrs, /*IfCond=*/nullptr, + Inputs, GenMapInfoCB, BodyGenCB, + SimpleArgAccessorCB, CustomMapperCB, {}, false)); EXPECT_EQ(DL, Builder.getCurrentDebugLocation()); Builder.restoreIP(AfterIP); @@ -6565,8 +6595,9 @@ TEST_F(OpenMPIRBuilderTest, TargetRegionDevice) { }; auto CustomMapperCB = [&](unsigned int I) { return nullptr; }; - auto BodyGenCB = [&](OpenMPIRBuilder::InsertPointTy AllocaIP, - OpenMPIRBuilder::InsertPointTy CodeGenIP) + auto BodyGenCB = [&](OpenMPIRBuilder::InsertPointTy AllocIP, + OpenMPIRBuilder::InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) -> OpenMPIRBuilder::InsertPointTy { IRBuilderBase::InsertPointGuard guard(Builder); Builder.SetCurrentDebugLocation(llvm::DebugLoc()); @@ -6591,7 +6622,7 @@ TEST_F(OpenMPIRBuilderTest, TargetRegionDevice) { ASSERT_EXPECTED_INIT( OpenMPIRBuilder::InsertPointTy, AfterIP, OMPBuilder.createTarget(Loc, /*IsOffloadEntry=*/true, EntryIP, EntryIP, - Info, EntryInfo, DefaultAttrs, RuntimeAttrs, + {}, Info, EntryInfo, DefaultAttrs, RuntimeAttrs, /*IfCond=*/nullptr, CapturedArgs, GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB, CustomMapperCB, {}, false)); @@ -6672,7 +6703,14 @@ TEST_F(OpenMPIRBuilderTest, TargetRegionDevice) { Instruction *Value1 = &*OutlinedBlock->getFirstNonPHIIt(); EXPECT_EQ(Value1, Value); EXPECT_EQ(Value1->getNextNode(), TargetStore); - auto *Deinit = TargetStore->getNextNode(); + + auto *TargetExitBlockBr = TargetStore->getNextNode(); + EXPECT_TRUE(isa(TargetExitBlockBr)); + + auto *TargetExitBlock = TargetExitBlockBr->getSuccessor(0); + EXPECT_EQ(TargetExitBlock->getName(), "target.exit"); + + Instruction *Deinit = &*TargetExitBlock->getFirstNonPHIIt(); EXPECT_NE(Deinit, nullptr); auto *DeinitCall = dyn_cast(Deinit); @@ -6719,8 +6757,8 @@ TEST_F(OpenMPIRBuilderTest, TargetRegionSPMD) { IRBuilder<> Builder(BB); auto CustomMapperCB = [&](unsigned int I) { return nullptr; }; - auto BodyGenCB = [&](InsertPointTy, - InsertPointTy CodeGenIP) -> InsertPointTy { + auto BodyGenCB = [&](InsertPointTy, InsertPointTy CodeGenIP, + ArrayRef) -> InsertPointTy { Builder.restoreIP(CodeGenIP); return Builder.saveIP(); }; @@ -6753,10 +6791,10 @@ TEST_F(OpenMPIRBuilderTest, TargetRegionSPMD) { ASSERT_EXPECTED_INIT( OpenMPIRBuilder::InsertPointTy, AfterIP, OMPBuilder.createTarget(OmpLoc, /*IsOffloadEntry=*/true, Builder.saveIP(), - Builder.saveIP(), Info, EntryInfo, DefaultAttrs, - RuntimeAttrs, /*IfCond=*/nullptr, Inputs, - GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB, - CustomMapperCB, {})); + Builder.saveIP(), {}, Info, EntryInfo, + DefaultAttrs, RuntimeAttrs, /*IfCond=*/nullptr, + Inputs, GenMapInfoCB, BodyGenCB, + SimpleArgAccessorCB, CustomMapperCB, {})); Builder.restoreIP(AfterIP); OMPBuilder.finalize(); @@ -6839,7 +6877,8 @@ TEST_F(OpenMPIRBuilderTest, TargetRegionDeviceSPMD) { auto CustomMapperCB = [&](unsigned int I) { return nullptr; }; auto BodyGenCB = [&](OpenMPIRBuilder::InsertPointTy, - OpenMPIRBuilder::InsertPointTy CodeGenIP) + OpenMPIRBuilder::InsertPointTy CodeGenIP, + ArrayRef) -> OpenMPIRBuilder::InsertPointTy { Builder.restoreIP(CodeGenIP); OutlinedFn = CodeGenIP.getBlock()->getParent(); @@ -6860,8 +6899,8 @@ TEST_F(OpenMPIRBuilderTest, TargetRegionDeviceSPMD) { ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, OMPBuilder.createTarget( - Loc, /*IsOffloadEntry=*/true, EntryIP, EntryIP, Info, - EntryInfo, DefaultAttrs, RuntimeAttrs, + Loc, /*IsOffloadEntry=*/true, EntryIP, EntryIP, {}, + Info, EntryInfo, DefaultAttrs, RuntimeAttrs, /*IfCond=*/nullptr, CapturedArgs, GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB, CustomMapperCB, {})); Builder.restoreIP(AfterIP); @@ -6958,8 +6997,9 @@ TEST_F(OpenMPIRBuilderTest, ConstantAllocaRaise) { llvm::Value *RaiseAlloca = nullptr; auto CustomMapperCB = [&](unsigned int I) { return nullptr; }; - auto BodyGenCB = [&](OpenMPIRBuilder::InsertPointTy AllocaIP, - OpenMPIRBuilder::InsertPointTy CodeGenIP) + auto BodyGenCB = [&](OpenMPIRBuilder::InsertPointTy AllocIP, + OpenMPIRBuilder::InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) -> OpenMPIRBuilder::InsertPointTy { IRBuilderBase::InsertPointGuard guard(Builder); Builder.SetCurrentDebugLocation(llvm::DebugLoc()); @@ -6985,7 +7025,7 @@ TEST_F(OpenMPIRBuilderTest, ConstantAllocaRaise) { ASSERT_EXPECTED_INIT( OpenMPIRBuilder::InsertPointTy, AfterIP, OMPBuilder.createTarget(Loc, /*IsOffloadEntry=*/true, EntryIP, EntryIP, - Info, EntryInfo, DefaultAttrs, RuntimeAttrs, + {}, Info, EntryInfo, DefaultAttrs, RuntimeAttrs, /*IfCond=*/nullptr, CapturedArgs, GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB, CustomMapperCB, {}, false)); @@ -7062,7 +7102,14 @@ TEST_F(OpenMPIRBuilderTest, ConstantAllocaRaise) { EXPECT_TRUE(isa(Load2)); EXPECT_EQ(Load2, Value); EXPECT_EQ(Load2->getNextNode(), TargetStore); - auto *Deinit = TargetStore->getNextNode(); + + auto *TargetExitBlockBr = TargetStore->getNextNode(); + EXPECT_TRUE(isa(TargetExitBlockBr)); + + auto *TargetExitBlock = TargetExitBlockBr->getSuccessor(0); + EXPECT_EQ(TargetExitBlock->getName(), "target.exit"); + + Instruction *Deinit = &*TargetExitBlock->getFirstNonPHIIt(); EXPECT_NE(Deinit, nullptr); auto *DeinitCall = dyn_cast(Deinit); @@ -7091,8 +7138,9 @@ TEST_F(OpenMPIRBuilderTest, CreateTask) { Value *Val128 = Builder.CreateLoad(Builder.getInt128Ty(), ValPtr128, "bodygen.load"); - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { - Builder.restoreIP(AllocaIP); + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) { + Builder.restoreIP(AllocIP); AllocaInst *Local128 = Builder.CreateAlloca(Builder.getInt128Ty(), nullptr, "bodygen.alloca128"); @@ -7120,7 +7168,7 @@ TEST_F(OpenMPIRBuilderTest, CreateTask) { OpenMPIRBuilder::InsertPointTy, AfterIP, OMPBuilder.createTask( Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), - BodyGenCB)); + /*DeallocIPs=*/{}, BodyGenCB)); Builder.restoreIP(AfterIP); OMPBuilder.finalize(); Builder.CreateRetVoid(); @@ -7219,7 +7267,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskNoArgs) { F->setName("func"); IRBuilder<> Builder(BB); - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) { return Error::success(); }; @@ -7231,7 +7280,7 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskNoArgs) { OpenMPIRBuilder::InsertPointTy, AfterIP, OMPBuilder.createTask( Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), - BodyGenCB)); + /*DeallocIPs=*/{}, BodyGenCB)); Builder.restoreIP(AfterIP); OMPBuilder.finalize(); Builder.CreateRetVoid(); @@ -7254,7 +7303,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskUntied) { OMPBuilder.initialize(); F->setName("func"); IRBuilder<> Builder(BB); - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) { return Error::success(); }; BasicBlock *AllocaBB = Builder.GetInsertBlock(); @@ -7265,7 +7315,7 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskUntied) { OpenMPIRBuilder::InsertPointTy, AfterIP, OMPBuilder.createTask( Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), - BodyGenCB, + /*DeallocIPs=*/{}, BodyGenCB, /*Tied=*/false)); Builder.restoreIP(AfterIP); OMPBuilder.finalize(); @@ -7290,7 +7340,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskDepend) { OMPBuilder.initialize(); F->setName("func"); IRBuilder<> Builder(BB); - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) { return Error::success(); }; BasicBlock *AllocaBB = Builder.GetInsertBlock(); @@ -7308,7 +7359,7 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskDepend) { OpenMPIRBuilder::InsertPointTy, AfterIP, OMPBuilder.createTask( Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), - BodyGenCB, + /*DeallocIPs=*/{}, BodyGenCB, /*Tied=*/false, /*Final*/ nullptr, /*IfCondition*/ nullptr, DDS)); Builder.restoreIP(AfterIP); OMPBuilder.finalize(); @@ -7370,7 +7421,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskFinal) { OMPBuilder.initialize(); F->setName("func"); IRBuilder<> Builder(BB); - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) { return Error::success(); }; BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); @@ -7381,7 +7433,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskFinal) { ConstantInt::get(Type::getInt32Ty(M->getContext()), 0U)); OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL); ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, - OMPBuilder.createTask(Loc, AllocaIP, BodyGenCB, + OMPBuilder.createTask(Loc, AllocaIP, /*DeallocIPs=*/{}, + BodyGenCB, /*Tied=*/false, Final)); Builder.restoreIP(AfterIP); OMPBuilder.finalize(); @@ -7428,7 +7481,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskIfCondition) { OMPBuilder.initialize(); F->setName("func"); IRBuilder<> Builder(BB); - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) { return Error::success(); }; BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); @@ -7438,10 +7492,10 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskIfCondition) { CmpInst::Predicate::ICMP_EQ, F->getArg(0), ConstantInt::get(Type::getInt32Ty(M->getContext()), 0U)); OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL); - ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, - OMPBuilder.createTask(Loc, AllocaIP, BodyGenCB, - /*Tied=*/false, /*Final=*/nullptr, - IfCondition)); + ASSERT_EXPECTED_INIT( + OpenMPIRBuilder::InsertPointTy, AfterIP, + OMPBuilder.createTask(Loc, AllocaIP, /*DeallocIPs=*/{}, BodyGenCB, + /*Tied=*/false, /*Final=*/nullptr, IfCondition)); Builder.restoreIP(AfterIP); OMPBuilder.finalize(); Builder.CreateRetVoid(); @@ -7507,8 +7561,9 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskgroup) { Value *InternalStoreInst, *InternalLoad32, *InternalLoad128, *InternalIfCmp; - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { - Builder.restoreIP(AllocaIP); + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) { + Builder.restoreIP(AllocIP); AllocaInst *Local128 = Builder.CreateAlloca(Builder.getInt128Ty(), nullptr, "bodygen.alloca128"); @@ -7536,7 +7591,7 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskgroup) { ASSERT_EXPECTED_INIT( OpenMPIRBuilder::InsertPointTy, AfterIP, OMPBuilder.createTaskgroup( - Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), + Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), {}, BodyGenCB)); Builder.restoreIP(AfterIP); OMPBuilder.finalize(); @@ -7598,14 +7653,16 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskgroupWithTasks) { F->setName("func"); IRBuilder<> Builder(BB); - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { - Builder.restoreIP(AllocaIP); + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) { + Builder.restoreIP(AllocIP); AllocaInst *Alloca32 = Builder.CreateAlloca(Builder.getInt32Ty(), nullptr, "bodygen.alloca32"); AllocaInst *Alloca64 = Builder.CreateAlloca(Builder.getInt64Ty(), nullptr, "bodygen.alloca64"); Builder.restoreIP(CodeGenIP); - auto TaskBodyGenCB1 = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto TaskBodyGenCB1 = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) { Builder.restoreIP(CodeGenIP); LoadInst *LoadValue = Builder.CreateLoad(Alloca64->getAllocatedType(), Alloca64); @@ -7614,11 +7671,13 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskgroupWithTasks) { return Error::success(); }; OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL); - ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, TaskIP1, - OMPBuilder.createTask(Loc, AllocaIP, TaskBodyGenCB1)); + ASSERT_EXPECTED_INIT( + OpenMPIRBuilder::InsertPointTy, TaskIP1, + OMPBuilder.createTask(Loc, AllocIP, DeallocIPs, TaskBodyGenCB1)); Builder.restoreIP(TaskIP1); - auto TaskBodyGenCB2 = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto TaskBodyGenCB2 = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef DeallocIPs) { Builder.restoreIP(CodeGenIP); LoadInst *LoadValue = Builder.CreateLoad(Alloca32->getAllocatedType(), Alloca32); @@ -7627,8 +7686,9 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskgroupWithTasks) { return Error::success(); }; OpenMPIRBuilder::LocationDescription Loc2(Builder.saveIP(), DL); - ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, TaskIP2, - OMPBuilder.createTask(Loc2, AllocaIP, TaskBodyGenCB2)); + ASSERT_EXPECTED_INIT( + OpenMPIRBuilder::InsertPointTy, TaskIP2, + OMPBuilder.createTask(Loc2, AllocIP, DeallocIPs, TaskBodyGenCB2)); Builder.restoreIP(TaskIP2); }; @@ -7639,7 +7699,7 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskgroupWithTasks) { ASSERT_EXPECTED_INIT( OpenMPIRBuilder::InsertPointTy, AfterIP, OMPBuilder.createTaskgroup( - Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), + Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), {}, BODYGENCB_WRAPPER(BodyGenCB))); Builder.restoreIP(AfterIP); OMPBuilder.finalize(); diff --git a/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp b/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp index 6fd266a815dcf..d63e346e31a1d 100644 --- a/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp +++ b/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp @@ -712,7 +712,7 @@ TEST(CodeExtractor, OpenMPAggregateArgs) { /* AllowVarArgs */ true, /* AllowAlloca */ true, /* AllocationBlock*/ &Func->getEntryBlock(), - /* DeallocationBlock */ nullptr, + /* DeallocationBlocks */ {}, /* Suffix */ ".outlined", /* ArgsInZeroAddressSpace */ true); diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 61ddc8339b692..80e052105dc4c 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -67,14 +67,17 @@ convertToScheduleKind(std::optional schedKind) { /// ModuleTranslation stack frame for OpenMP operations. This keeps track of the /// insertion points for allocas. -class OpenMPAllocaStackFrame - : public StateStackFrameBase { +class OpenMPAllocStackFrame + : public StateStackFrameBase { public: - MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPAllocaStackFrame) - - explicit OpenMPAllocaStackFrame(llvm::OpenMPIRBuilder::InsertPointTy allocaIP) - : allocaInsertPoint(allocaIP) {} - llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint; + MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPAllocStackFrame) + + explicit OpenMPAllocStackFrame( + llvm::OpenMPIRBuilder::InsertPointTy allocIP, + llvm::ArrayRef deallocIPs) + : allocInsertPoint(allocIP), deallocInsertPoints(deallocIPs) {} + llvm::OpenMPIRBuilder::InsertPointTy allocInsertPoint; + llvm::SmallVector deallocInsertPoints; }; /// Stack frame to hold a \see llvm::CanonicalLoopInfo representing the @@ -483,26 +486,33 @@ static LogicalResult handleError(llvm::Expected &result, Operation &op) { /// Find the insertion point for allocas given the current insertion point for /// normal operations in the builder. -static llvm::OpenMPIRBuilder::InsertPointTy -findAllocaInsertPoint(llvm::IRBuilderBase &builder, - LLVM::ModuleTranslation &moduleTranslation) { - // If there is an alloca insertion point on stack, i.e. we are in a nested +static llvm::OpenMPIRBuilder::InsertPointTy findAllocInsertPoints( + llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, + llvm::SmallVectorImpl *deallocIPs = + nullptr) { + // If there is an allocation insertion point on stack, i.e. we are in a nested // operation and a specific point was provided by some surrounding operation, // use it. - llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint; - WalkResult walkResult = moduleTranslation.stackWalk( - [&](OpenMPAllocaStackFrame &frame) { - allocaInsertPoint = frame.allocaInsertPoint; + llvm::OpenMPIRBuilder::InsertPointTy allocInsertPoint; + llvm::ArrayRef deallocInsertPoints; + WalkResult walkResult = moduleTranslation.stackWalk( + [&](OpenMPAllocStackFrame &frame) { + allocInsertPoint = frame.allocInsertPoint; + deallocInsertPoints = frame.deallocInsertPoints; return WalkResult::interrupt(); }); // In cases with multiple levels of outlining, the tree walk might find an - // alloca insertion point that is inside the original function while the - // builder insertion point is inside the outlined function. We need to make - // sure that we do not use it in those cases. + // insertion point that is inside the original function while the builder + // insertion point is inside the outlined function. We need to make sure that + // we do not use it in those cases. if (walkResult.wasInterrupted() && - allocaInsertPoint.getBlock()->getParent() == - builder.GetInsertBlock()->getParent()) - return allocaInsertPoint; + allocInsertPoint.getBlock()->getParent() == + builder.GetInsertBlock()->getParent()) { + if (deallocIPs) + deallocIPs->insert(deallocIPs->end(), deallocInsertPoints.begin(), + deallocInsertPoints.end()); + return allocInsertPoint; + } // Otherwise, insert to the entry block of the surrounding function. // If the current IRBuilder InsertPoint is the function's entry, it cannot @@ -510,7 +520,7 @@ findAllocaInsertPoint(llvm::IRBuilderBase &builder, // confusion. Create a new BasicBlock for the Builder and use the entry block // for the allocs. // TODO: Create a dedicated alloca BasicBlock at function creation such that - // we do not need to move the current InertPoint here. + // we do not need to move the current InsertPoint here. if (builder.GetInsertBlock() == &builder.GetInsertBlock()->getParent()->getEntryBlock()) { assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end() && @@ -522,6 +532,16 @@ findAllocaInsertPoint(llvm::IRBuilderBase &builder, builder.SetInsertPoint(entryBB); } + // Collect exit blocks, which is where explicit deallocations should happen in + // this case. + if (deallocIPs) { + for (llvm::BasicBlock &block : *builder.GetInsertBlock()->getParent()) { + llvm::Instruction *terminator = block.getTerminator(); + if (isa_and_present(terminator)) + deallocIPs->emplace_back(&block, terminator->getIterator()); + } + } + llvm::BasicBlock &funcEntryBlock = builder.GetInsertBlock()->getParent()->getEntryBlock(); return llvm::OpenMPIRBuilder::InsertPointTy( @@ -709,7 +729,8 @@ convertOmpMasked(Operation &opInst, llvm::IRBuilderBase &builder, if (failed(checkImplementationStatus(opInst))) return failure(); - auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) { + auto bodyGenCB = [&](InsertPointTy allocIP, InsertPointTy codeGenIP, + llvm::ArrayRef deallocIPs) { // MaskedOp has only one region associated with it. auto ®ion = maskedOp.getRegion(); builder.restoreIP(codeGenIP); @@ -753,7 +774,8 @@ convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder, if (failed(checkImplementationStatus(opInst))) return failure(); - auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) { + auto bodyGenCB = [&](InsertPointTy allocIP, InsertPointTy codeGenIP, + llvm::ArrayRef deallocIPs) { // MasterOp has only one region associated with it. auto ®ion = masterOp.getRegion(); builder.restoreIP(codeGenIP); @@ -788,7 +810,8 @@ convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder, if (failed(checkImplementationStatus(opInst))) return failure(); - auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) { + auto bodyGenCB = [&](InsertPointTy allocIP, InsertPointTy codeGenIP, + llvm::ArrayRef deallocIPs) { // CriticalOp has only one region associated with it. auto ®ion = cast(opInst).getRegion(); builder.restoreIP(codeGenIP); @@ -1048,7 +1071,7 @@ convertOmpOrdered(Operation &opInst, llvm::IRBuilderBase &builder, indexVecValues++; } llvm::OpenMPIRBuilder::InsertPointTy allocaIP = - findAllocaInsertPoint(builder, moduleTranslation); + findAllocInsertPoints(builder, moduleTranslation); llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createOrderedDepend( ompLoc, allocaIP, numLoops, storeValues, ".cnt.addr", isDependSource)); @@ -1067,7 +1090,8 @@ convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder, if (failed(checkImplementationStatus(opInst))) return failure(); - auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) { + auto bodyGenCB = [&](InsertPointTy allocIP, InsertPointTy codeGenIP, + llvm::ArrayRef deallocIPs) { // OrderedOp has only one region associated with it. auto ®ion = cast(opInst).getRegion(); builder.restoreIP(codeGenIP); @@ -1863,7 +1887,7 @@ convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder, SmallVector reductionDecls; collectReductionDecls(sectionsOp, reductionDecls); llvm::OpenMPIRBuilder::InsertPointTy allocaIP = - findAllocaInsertPoint(builder, moduleTranslation); + findAllocInsertPoints(builder, moduleTranslation); SmallVector privateReductionVariables( sectionsOp.getNumReductionVars()); @@ -1887,7 +1911,8 @@ convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder, Region ®ion = sectionOp.getRegion(); auto sectionCB = [§ionsOp, ®ion, &builder, &moduleTranslation]( - InsertPointTy allocaIP, InsertPointTy codeGenIP) { + InsertPointTy allocIP, InsertPointTy codeGenIP, + ArrayRef deallocIPs) { builder.restoreIP(codeGenIP); // map the omp.section reduction block argument to the omp.sections block @@ -1932,7 +1957,7 @@ convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder, // called for variables which have destructors/finalizers. auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); }; - allocaIP = findAllocaInsertPoint(builder, moduleTranslation); + allocaIP = findAllocInsertPoints(builder, moduleTranslation); bool isCancellable = constructIsCancellable(sectionsOp); llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = @@ -1961,7 +1986,8 @@ convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder, if (failed(checkImplementationStatus(*singleOp))) return failure(); - auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) { + auto bodyCB = [&](InsertPointTy allocIP, InsertPointTy codegenIP, + llvm::ArrayRef deallocIPs) { builder.restoreIP(codegenIP); return convertOmpOpRegions(singleOp.getRegion(), "omp.single.region", builder, moduleTranslation) @@ -2044,7 +2070,7 @@ convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder, SmallVector privateReductionVariables(numReductionVars); llvm::ArrayRef isByRef; llvm::OpenMPIRBuilder::InsertPointTy allocaIP = - findAllocaInsertPoint(builder, moduleTranslation); + findAllocInsertPoints(builder, moduleTranslation); // Only do teams reduction if there is no distribute op that captures the // reduction instead. @@ -2066,9 +2092,10 @@ convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder, return failure(); } - auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) { - LLVM::ModuleTranslation::SaveStack frame( - moduleTranslation, allocaIP); + auto bodyCB = [&](InsertPointTy allocIP, InsertPointTy codegenIP, + llvm::ArrayRef deallocIPs) { + LLVM::ModuleTranslation::SaveStack frame( + moduleTranslation, allocIP, deallocIPs); builder.restoreIP(codegenIP); return convertOmpOpRegions(op.getRegion(), "omp.teams.region", builder, moduleTranslation) @@ -2325,9 +2352,9 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder, // code outside of the outlined task region, which is what we want because // this way the initialization and copy regions are executed immediately while // the host variable data are still live. - - llvm::OpenMPIRBuilder::InsertPointTy allocaIP = - findAllocaInsertPoint(builder, moduleTranslation); + llvm::SmallVector deallocIPs; + InsertPointTy allocIP = + findAllocInsertPoints(builder, moduleTranslation, &deallocIPs); // Not using splitBB() because that requires the current block to have a // terminator. @@ -2357,8 +2384,8 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder, // Save the alloca insertion point on ModuleTranslation stack for use in // nested regions. - LLVM::ModuleTranslation::SaveStack frame( - moduleTranslation, allocaIP); + LLVM::ModuleTranslation::SaveStack frame( + moduleTranslation, allocIP, deallocIPs); // Allocate and initialize private variables builder.SetInsertPoint(initBlock->getTerminator()); @@ -2422,12 +2449,12 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder, // Set up for call to createTask() builder.SetInsertPoint(taskStartBlock); - auto bodyCB = [&](InsertPointTy allocaIP, - InsertPointTy codegenIP) -> llvm::Error { + auto bodyCB = [&](InsertPointTy allocIP, InsertPointTy codegenIP, + llvm::ArrayRef deallocIPs) -> llvm::Error { // Save the alloca insertion point on ModuleTranslation stack for use in // nested regions. - LLVM::ModuleTranslation::SaveStack frame( - moduleTranslation, allocaIP); + LLVM::ModuleTranslation::SaveStack frame( + moduleTranslation, allocIP, deallocIPs); // translate the body of the task: builder.restoreIP(codegenIP); @@ -2445,7 +2472,7 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder, llvm::IRBuilderBase::InsertPointGuard guard(builder); llvm::Type *llvmAllocType = moduleTranslation.convertType(privDecl.getType()); - builder.SetInsertPoint(allocaIP.getBlock()->getTerminator()); + builder.SetInsertPoint(allocIP.getBlock()->getTerminator()); llvm::Value *llvmPrivateVar = builder.CreateAlloca( llvmAllocType, /*ArraySize=*/nullptr, "omp.private.alloc"); @@ -2519,7 +2546,7 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = moduleTranslation.getOpenMPBuilder()->createTask( - ompLoc, allocaIP, bodyCB, !taskOp.getUntied(), + ompLoc, allocIP, deallocIPs, bodyCB, !taskOp.getUntied(), moduleTranslation.lookupValue(taskOp.getFinal()), moduleTranslation.lookupValue(taskOp.getIfExpr()), dds, taskOp.getMergeable(), @@ -2544,18 +2571,21 @@ convertOmpTaskgroupOp(omp::TaskgroupOp tgOp, llvm::IRBuilderBase &builder, if (failed(checkImplementationStatus(*tgOp))) return failure(); - auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) { + auto bodyCB = [&](InsertPointTy allocIP, InsertPointTy codegenIP, + llvm::ArrayRef deallocIPs) { builder.restoreIP(codegenIP); return convertOmpOpRegions(tgOp.getRegion(), "omp.taskgroup.region", builder, moduleTranslation) .takeError(); }; - InsertPointTy allocaIP = findAllocaInsertPoint(builder, moduleTranslation); + llvm::SmallVector deallocIPs; + InsertPointTy allocIP = + findAllocInsertPoints(builder, moduleTranslation, &deallocIPs); llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = - moduleTranslation.getOpenMPBuilder()->createTaskgroup(ompLoc, allocaIP, - bodyCB); + moduleTranslation.getOpenMPBuilder()->createTaskgroup(ompLoc, allocIP, + deallocIPs, bodyCB); if (failed(handleError(afterIP, *tgOp))) return failure(); @@ -2605,8 +2635,9 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, SmallVector reductionDecls; collectReductionDecls(wsloopOp, reductionDecls); + llvm::OpenMPIRBuilder::InsertPointTy allocaIP = - findAllocaInsertPoint(builder, moduleTranslation); + findAllocInsertPoints(builder, moduleTranslation); SmallVector privateReductionVariables( wsloopOp.getNumReductionVars()); @@ -2779,10 +2810,11 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, opInst.getNumReductionVars()); SmallVector deferredStores; - auto bodyGenCB = [&](InsertPointTy allocaIP, - InsertPointTy codeGenIP) -> llvm::Error { + auto bodyGenCB = + [&](InsertPointTy allocIP, InsertPointTy codeGenIP, + llvm::ArrayRef deallocIPs) -> llvm::Error { llvm::Expected afterAllocas = allocatePrivateVars( - opInst, builder, moduleTranslation, privateVarsInfo, allocaIP); + opInst, builder, moduleTranslation, privateVarsInfo, allocIP); if (handleError(afterAllocas, *opInst).failed()) return llvm::make_error(); @@ -2792,12 +2824,11 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, MutableArrayRef reductionArgs = cast(*opInst).getReductionBlockArgs(); - allocaIP = - InsertPointTy(allocaIP.getBlock(), - allocaIP.getBlock()->getTerminator()->getIterator()); + allocIP = InsertPointTy(allocIP.getBlock(), + allocIP.getBlock()->getTerminator()->getIterator()); if (failed(allocReductionVars( - opInst, reductionArgs, builder, moduleTranslation, allocaIP, + opInst, reductionArgs, builder, moduleTranslation, allocIP, reductionDecls, privateReductionVariables, reductionVariableMap, deferredStores, isByRef))) return llvm::make_error(); @@ -2826,8 +2857,8 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, // Save the alloca insertion point on ModuleTranslation stack for use in // nested regions. - LLVM::ModuleTranslation::SaveStack frame( - moduleTranslation, allocaIP); + LLVM::ModuleTranslation::SaveStack frame( + moduleTranslation, allocIP, deallocIPs); // ParallelOp has only one region associated with it. llvm::Expected regionBlock = convertOmpOpRegions( @@ -2854,7 +2885,7 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder::InsertPointOrErrorTy contInsertPoint = ompBuilder->createReductions( - builder.saveIP(), allocaIP, reductionInfos, isByRef, + builder.saveIP(), allocIP, reductionInfos, isByRef, /*IsNoWait=*/false, /*IsTeamsReduction=*/false); if (!contInsertPoint) return contInsertPoint.takeError(); @@ -2915,13 +2946,15 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, pbKind = getProcBindKind(*bind); bool isCancellable = constructIsCancellable(opInst); - llvm::OpenMPIRBuilder::InsertPointTy allocaIP = - findAllocaInsertPoint(builder, moduleTranslation); + llvm::SmallVector deallocIPs; + llvm::OpenMPIRBuilder::InsertPointTy allocIP = + findAllocInsertPoints(builder, moduleTranslation, &deallocIPs); llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = - ompBuilder->createParallel(ompLoc, allocaIP, bodyGenCB, privCB, finiCB, - ifCond, numThreads, pbKind, isCancellable); + ompBuilder->createParallel(ompLoc, allocIP, deallocIPs, bodyGenCB, privCB, + finiCB, ifCond, numThreads, pbKind, + isCancellable); if (failed(handleError(afterIP, *opInst))) return failure(); @@ -2966,7 +2999,7 @@ convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder, assert(isByRef.size() == simdOp.getNumReductionVars()); llvm::OpenMPIRBuilder::InsertPointTy allocaIP = - findAllocaInsertPoint(builder, moduleTranslation); + findAllocInsertPoints(builder, moduleTranslation); llvm::Expected afterAllocas = allocatePrivateVars( simdOp, builder, moduleTranslation, privateVarsInfo, allocaIP); @@ -3308,7 +3341,7 @@ convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); llvm::OpenMPIRBuilder::InsertPointTy allocaIP = - findAllocaInsertPoint(builder, moduleTranslation); + findAllocInsertPoints(builder, moduleTranslation); llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); @@ -3335,7 +3368,7 @@ convertOmpAtomicWrite(Operation &opInst, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); llvm::OpenMPIRBuilder::InsertPointTy allocaIP = - findAllocaInsertPoint(builder, moduleTranslation); + findAllocInsertPoints(builder, moduleTranslation); llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); llvm::AtomicOrdering ao = convertAtomicOrdering(writeOp.getMemoryOrder()); @@ -3452,7 +3485,7 @@ convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst, extractAtomicControlFlags(opInst, isIgnoreDenormalMode, isFineGrainedMemory, isRemoteMemory); // Handle ambiguous alloca, if any. - auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation); + auto allocaIP = findAllocInsertPoints(builder, moduleTranslation); llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = ompBuilder->createAtomicUpdate(ompLoc, allocaIP, llvmAtomicX, llvmExpr, @@ -3553,7 +3586,7 @@ convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp, extractAtomicControlFlags(atomicUpdateOp, isIgnoreDenormalMode, isFineGrainedMemory, isRemoteMemory); // Handle ambiguous alloca, if any. - auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation); + auto allocaIP = findAllocInsertPoints(builder, moduleTranslation); llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = ompBuilder->createAtomicCapture( @@ -4541,7 +4574,7 @@ createAlteredByCaptureMap(MapInfoData &mapData, if (!isPtrTy) { auto curInsert = builder.saveIP(); llvm::DebugLoc DbgLoc = builder.getCurrentDebugLocation(); - builder.restoreIP(findAllocaInsertPoint(builder, moduleTranslation)); + builder.restoreIP(findAllocInsertPoints(builder, moduleTranslation)); auto *memTempAlloc = builder.CreateAlloca(builder.getPtrTy(), nullptr, ".casted"); builder.SetCurrentDebugLocation(DbgLoc); @@ -4919,18 +4952,21 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder, }; llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); - llvm::OpenMPIRBuilder::InsertPointTy allocaIP = - findAllocaInsertPoint(builder, moduleTranslation); + llvm::SmallVector deallocIPs; + llvm::OpenMPIRBuilder::InsertPointTy allocIP = + findAllocInsertPoints(builder, moduleTranslation, &deallocIPs); llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = [&]() { if (isa(op)) - return ompBuilder->createTargetData(ompLoc, allocaIP, builder.saveIP(), + return ompBuilder->createTargetData(ompLoc, allocIP, builder.saveIP(), + deallocIPs, builder.getInt64(deviceID), ifCond, info, genMapInfoCB, customMapperCB, /*MapperFunc=*/nullptr, bodyGenCB, /*DeviceAddrCB=*/nullptr); - return ompBuilder->createTargetData( - ompLoc, allocaIP, builder.saveIP(), builder.getInt64(deviceID), ifCond, - info, genMapInfoCB, customMapperCB, &RTLFn); + return ompBuilder->createTargetData(ompLoc, allocIP, builder.saveIP(), + deallocIPs, builder.getInt64(deviceID), + ifCond, info, genMapInfoCB, + customMapperCB, &RTLFn); }(); if (failed(handleError(afterIP, *op))) @@ -4966,7 +5002,7 @@ convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder, collectReductionDecls(teamsOp, reductionDecls); llvm::OpenMPIRBuilder::InsertPointTy allocaIP = - findAllocaInsertPoint(builder, moduleTranslation); + findAllocInsertPoints(builder, moduleTranslation); MutableArrayRef reductionArgs = llvm::cast(*teamsOp) @@ -4980,19 +5016,20 @@ convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder, } using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; - auto bodyGenCB = [&](InsertPointTy allocaIP, - InsertPointTy codeGenIP) -> llvm::Error { + auto bodyGenCB = + [&](InsertPointTy allocIP, InsertPointTy codeGenIP, + llvm::ArrayRef deallocIPs) -> llvm::Error { // Save the alloca insertion point on ModuleTranslation stack for use in // nested regions. - LLVM::ModuleTranslation::SaveStack frame( - moduleTranslation, allocaIP); + LLVM::ModuleTranslation::SaveStack frame( + moduleTranslation, allocIP, deallocIPs); // DistributeOp has only one region associated with it. builder.restoreIP(codeGenIP); PrivateVarsInfo privVarsInfo(distributeOp); llvm::Expected afterAllocas = allocatePrivateVars( - distributeOp, builder, moduleTranslation, privVarsInfo, allocaIP); + distributeOp, builder, moduleTranslation, privVarsInfo, allocIP); if (handleError(afterAllocas, opInst).failed()) return llvm::make_error(); @@ -5035,7 +5072,7 @@ convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder, findCurrentLoopInfo(moduleTranslation); llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP = ompBuilder->applyWorkshareLoop( - ompLoc.DL, loopInfo, allocaIP, loopNeedsBarrier, + ompLoc.DL, loopInfo, allocIP, loopNeedsBarrier, convertToScheduleKind(schedule), chunk, isSimd, scheduleMod == omp::ScheduleModifier::monotonic, scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered, @@ -5052,11 +5089,12 @@ convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder, return llvm::Error::success(); }; - llvm::OpenMPIRBuilder::InsertPointTy allocaIP = - findAllocaInsertPoint(builder, moduleTranslation); + llvm::SmallVector deallocIPs; + llvm::OpenMPIRBuilder::InsertPointTy allocIP = + findAllocInsertPoints(builder, moduleTranslation, &deallocIPs); llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = - ompBuilder->createDistribute(ompLoc, allocaIP, bodyGenCB); + ompBuilder->createDistribute(ompLoc, allocIP, deallocIPs, bodyGenCB); if (failed(handleError(afterIP, opInst))) return failure(); @@ -5066,7 +5104,7 @@ convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder, if (doDistributeReduction) { // Process the reductions if required. return createReductionsAndCleanup( - teamsOp, builder, moduleTranslation, allocaIP, reductionDecls, + teamsOp, builder, moduleTranslation, allocIP, reductionDecls, privateReductionVariables, isByRef, /*isNoWait*/ false, /*isTeamsReduction*/ true); } @@ -5740,7 +5778,8 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, } using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; - auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) + auto bodyCB = [&](InsertPointTy allocIP, InsertPointTy codeGenIP, + ArrayRef deallocIPs) -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy { llvm::IRBuilderBase::InsertPointGuard guard(builder); builder.SetCurrentDebugLocation(llvm::DebugLoc()); @@ -5782,7 +5821,7 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, llvm::Expected afterAllocas = allocatePrivateVars(targetOp, builder, moduleTranslation, - privateVarsInfo, allocaIP, &mappedPrivateVars); + privateVarsInfo, allocIP, &mappedPrivateVars); if (failed(handleError(afterAllocas, *targetOp))) return llvm::make_error(); @@ -5807,6 +5846,8 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, return &privatizer.getDeallocRegion(); }); + LLVM::ModuleTranslation::SaveStack frame( + moduleTranslation, allocIP, deallocIPs); llvm::Expected exitBlock = convertOmpOpRegions( targetRegion, "omp.target", builder, moduleTranslation); @@ -5910,8 +5951,9 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, buildDependData(targetOp.getDependKinds(), targetOp.getDependVars(), moduleTranslation, dds); - llvm::OpenMPIRBuilder::InsertPointTy allocaIP = - findAllocaInsertPoint(builder, moduleTranslation); + llvm::SmallVector deallocIPs; + llvm::OpenMPIRBuilder::InsertPointTy allocIP = + findAllocInsertPoints(builder, moduleTranslation, &deallocIPs); llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); llvm::OpenMPIRBuilder::TargetDataInfo info( @@ -5933,9 +5975,10 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = moduleTranslation.getOpenMPBuilder()->createTarget( - ompLoc, isOffloadEntry, allocaIP, builder.saveIP(), info, entryInfo, - defaultAttrs, runtimeAttrs, ifCond, kernelInput, genMapInfoCB, bodyCB, - argAccessorCB, customMapperCB, dds, targetOp.getNowait()); + ompLoc, isOffloadEntry, allocIP, builder.saveIP(), deallocIPs, info, + entryInfo, defaultAttrs, runtimeAttrs, ifCond, kernelInput, + genMapInfoCB, bodyCB, argAccessorCB, customMapperCB, dds, + targetOp.getNowait()); if (failed(handleError(afterIP, opInst))) return failure(); diff --git a/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir index ca998b4672ba0..c3ce2f62c486e 100644 --- a/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir @@ -55,21 +55,21 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo // CHECK: define weak_odr protected amdgpu_kernel void @[[FUNC0:.*]]( // CHECK-SAME: ptr %[[TMP:.*]], ptr %[[TMP0:.*]]) #{{[0-9]+}} { // CHECK: %[[TMP1:.*]] = alloca [1 x ptr], align 8, addrspace(5) -// CHECK: %[[TMP2:.*]] = addrspacecast ptr addrspace(5) %[[TMP1]] to ptr -// CHECK: %[[TMP3:.*]] = alloca ptr, align 8, addrspace(5) -// CHECK: %[[TMP4:.*]] = addrspacecast ptr addrspace(5) %[[TMP3]] to ptr -// CHECK: store ptr %[[TMP0]], ptr %[[TMP4]], align 8 -// CHECK: %[[TMP5:.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{.*}} to ptr), ptr %[[TMP]]) -// CHECK: %[[EXEC_USER_CODE:.*]] = icmp eq i32 %[[TMP5]], -1 +// CHECK: %[[TMP2:.*]] = alloca ptr, align 8, addrspace(5) +// CHECK: %[[TMP3:.*]] = addrspacecast ptr addrspace(5) %[[TMP2]] to ptr +// CHECK: store ptr %[[TMP0]], ptr %[[TMP3]], align 8 +// CHECK: %[[TMP4:.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{.*}} to ptr), ptr %[[TMP]]) +// CHECK: %[[EXEC_USER_CODE:.*]] = icmp eq i32 %[[TMP4]], -1 // CHECK: br i1 %[[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[WORKER_EXIT:.*]] -// CHECK: %[[TMP6:.*]] = load ptr, ptr %[[TMP4]], align 8 +// CHECK: %[[TMP5:.*]] = addrspacecast ptr addrspace(5) %[[TMP1]] to ptr // CHECK: %[[STRUCTARG:.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8) +// CHECK: %[[TMP6:.*]] = load ptr, ptr %[[TMP3]], align 8 // CHECK: %[[OMP_GLOBAL_THREAD_NUM:.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr)) // CHECK: %[[GEP_:.*]] = getelementptr { ptr }, ptr %[[STRUCTARG]], i32 0, i32 0 // CHECK: store ptr %[[TMP6]], ptr %[[GEP_]], align 8 -// CHECK: %[[TMP7:.*]] = getelementptr inbounds [1 x ptr], ptr %[[TMP2]], i64 0, i64 0 +// CHECK: %[[TMP7:.*]] = getelementptr inbounds [1 x ptr], ptr %[[TMP5]], i64 0, i64 0 // CHECK: store ptr %[[STRUCTARG]], ptr %[[TMP7]], align 8 -// CHECK: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 1, i32 -1, i32 -1, ptr @[[FUNC1:.*]], ptr @[[FUNC1_WRAPPER:.*]], ptr %[[TMP2]], i64 1) +// CHECK: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 1, i32 -1, i32 -1, ptr @[[FUNC1:.*]], ptr @[[FUNC1_WRAPPER:.*]], ptr %[[TMP5]], i64 1) // CHECK: call void @__kmpc_free_shared(ptr %[[STRUCTARG]], i64 8) // CHECK: call void @__kmpc_target_deinit() diff --git a/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir index 5a76871c180ab..3ebb79fef7474 100644 --- a/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir @@ -56,7 +56,9 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo // CHECK: %[[B:.*]] = load i32, ptr %[[PTR_B]], align 4 // CHECK: %[[C:.*]] = add i32 %[[A]], %[[B]] // CHECK: store i32 %[[C]], ptr %[[PTR_C]], align 4 -// CHECK: br label %[[LABEL_DEINIT:.*]] +// CHECK: br label %[[LABEL_TARGET_EXIT:.*]] +// CHECK: [[LABEL_TARGET_EXIT]]: +// CHECK-NEXT: br label %[[LABEL_DEINIT:.*]] // CHECK: [[LABEL_DEINIT]]: // CHECK-NEXT: call void @__kmpc_target_deinit() // CHECK-NEXT: ret void diff --git a/mlir/test/Target/LLVMIR/openmp-target-private-allocatable.mlir b/mlir/test/Target/LLVMIR/openmp-target-private-allocatable.mlir index 0ee9230b5af0e..2aa11f3a1aa34 100644 --- a/mlir/test/Target/LLVMIR/openmp-target-private-allocatable.mlir +++ b/mlir/test/Target/LLVMIR/openmp-target-private-allocatable.mlir @@ -70,4 +70,6 @@ llvm.func @_FortranAAssign(!llvm.ptr, !llvm.ptr, !llvm.ptr, i32) -> !llvm.struct // CHECK: call void @dealloc_foo_1(ptr %[[DESC_TO_DEALLOC]]) // CHECK-NEXT: br label %[[CONT_BLOCK:.*]] // CHECK: [[CONT_BLOCK]]: +// CHECK-NEXT: br label %[[EXIT_BLOCK:.*]] +// CHECK: [[EXIT_BLOCK]]: // CHECK-NEXT: ret void