diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 206ad4a4ef85f..72c2192dc816a 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -31,6 +31,7 @@ namespace llvm { class CanonicalLoopInfo; +class ScanInfo; struct TargetRegionEntryInfo; class OffloadEntriesInfoManager; class OpenMPIRBuilder; @@ -707,6 +708,9 @@ class OpenMPIRBuilder { LLVM_ABI InsertPointOrErrorTy createCancellationPoint( const LocationDescription &Loc, omp::Directive CanceledDirective); + /// Creates a ScanInfo object, allocates and returns the pointer. + Expected scanInfoInitialize(); + /// Generator for '#omp parallel' /// /// \param Loc The insert and source location description. @@ -750,6 +754,42 @@ class OpenMPIRBuilder { LoopBodyGenCallbackTy BodyGenCB, Value *TripCount, const Twine &Name = "loop"); + /// Generator for the control flow structure of an OpenMP canonical loops if + /// the parent directive has an `inscan` modifier specified. + /// If the `inscan` modifier is specified, the region of the parent is + /// expected to have a `scan` directive. Based on the clauses in + /// scan directive, the body of the loop is split into two loops: Input loop + /// and Scan Loop. Input loop contains the code generated for input phase of + /// scan and Scan loop contains the code generated for scan phase of scan. + /// From the bodyGen callback of these loops, `createScan` would be called + /// when a scan directive is encountered from the loop body. `createScan` + /// based on whether 1. inclusive or exclusive scan is specified and, 2. input + /// loop or scan loop is generated, lowers the body of the for loop + /// accordingly. + /// + /// \param Loc The insert and source location description. + /// \param BodyGenCB Callback that will generate the loop body code. + /// \param Start Value of the loop counter for the first iterations. + /// \param Stop Loop counter values past this will stop the loop. + /// \param Step Loop counter increment after each iteration; negative + /// means counting down. + /// \param IsSigned Whether Start, Stop and Step are signed integers. + /// \param InclusiveStop Whether \p Stop itself is a valid value for the loop + /// counter. + /// \param ComputeIP Insertion point for instructions computing the trip + /// count. Can be used to ensure the trip count is available + /// at the outermost loop of a loop nest. If not set, + /// defaults to the preheader of the generated loop. + /// \param Name Base name used to derive BB and instruction names. + /// \param ScanRedInfo Pointer to the ScanInfo objected created using + /// `ScanInfoInitialize`. + /// + /// \returns A vector containing Loop Info of Input Loop and Scan Loop. + Expected> createCanonicalScanLoops( + const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, + Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop, + InsertPointTy ComputeIP, const Twine &Name, ScanInfo *ScanRedInfo); + /// Calculate the trip count of a canonical loop. /// /// This allows specifying user-defined loop counter values using increment, @@ -818,13 +858,17 @@ class OpenMPIRBuilder { /// at the outermost loop of a loop nest. If not set, /// defaults to the preheader of the generated loop. /// \param Name Base name used to derive BB and instruction names. + /// \param InScan Whether loop has a scan reduction specified. + /// \param ScanRedInfo Pointer to the ScanInfo objected created using + /// `ScanInfoInitialize`. /// /// \returns An object representing the created control flow structure which /// can be used for loop-associated directives. LLVM_ABI Expected createCanonicalLoop( const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop, - InsertPointTy ComputeIP = {}, const Twine &Name = "loop"); + InsertPointTy ComputeIP = {}, const Twine &Name = "loop", + bool InScan = false, ScanInfo *ScanRedInfo = nullptr); /// Collapse a loop nest into a single loop. /// @@ -1556,6 +1600,47 @@ class OpenMPIRBuilder { ArrayRef ReductionInfos, Function *ReduceFn, AttributeList FuncAttrs); + /// Helper function for CreateCanonicalScanLoops to create InputLoop + /// in the firstGen and Scan Loop in the SecondGen + /// \param InputLoopGen Callback for generating the loop for input phase + /// \param ScanLoopGen Callback for generating the loop for scan phase + /// \param ScanRedInfo Pointer to the ScanInfo objected created using + /// `ScanInfoInitialize`. + /// + /// \return error if any produced, else return success. + Error emitScanBasedDirectiveIR( + llvm::function_ref InputLoopGen, + llvm::function_ref ScanLoopGen, + ScanInfo *ScanRedInfo); + + /// Creates the basic blocks required for scan reduction. + /// \param ScanRedInfo Pointer to the ScanInfo objected created using + /// `ScanInfoInitialize`. + void createScanBBs(ScanInfo *ScanRedInfo); + + /// Dynamically allocates the buffer needed for scan reduction. + /// \param AllocaIP The IP where possibly-shared pointer of buffer needs to + /// be declared. + /// \param ScanVars Scan Variables. + /// \param ScanRedInfo Pointer to the ScanInfo objected created using + /// `ScanInfoInitialize`. + /// + /// \return error if any produced, else return success. + Error emitScanBasedDirectiveDeclsIR(InsertPointTy AllocaIP, + ArrayRef ScanVars, + ArrayRef ScanVarsType, + ScanInfo *ScanRedInfo); + + /// Copies the result back to the reduction variable. + /// \param ReductionInfos Array type containing the ReductionOps. + /// \param ScanRedInfo Pointer to the ScanInfo objected created using + /// `ScanInfoInitialize`. + /// + /// \return error if any produced, else return success. + Error emitScanBasedDirectiveFinalsIR( + ArrayRef ReductionInfos, + ScanInfo *ScanInfo); + /// This function emits a helper that gathers Reduce lists from the first /// lane of every active warp to lanes in the first warp. /// @@ -2184,6 +2269,9 @@ class OpenMPIRBuilder { /// free'd. std::forward_list LoopInfos; + /// Collection of owned ScanInfo objects that eventually need to be free'd. + std::forward_list ScanInfos; + /// Add a new region that will be outlined later. void addOutlineInfo(OutlineInfo &&OI) { OutlineInfos.emplace_back(OI); } @@ -2639,6 +2727,48 @@ class OpenMPIRBuilder { FinalizeCallbackTy FiniCB, Value *Filter); + /// This function performs the scan reduction of the values updated in + /// the input phase. The reduction logic needs to be emitted between input + /// and scan loop returned by `CreateCanonicalScanLoops`. The following + /// is the code that is generated, `buffer` and `span` are expected to be + /// populated before executing the generated code. + /// \code{c} + /// for (int k = 0; k != ceil(log2(span)); ++k) { + /// i=pow(2,k) + /// for (size cnt = last_iter; cnt >= i; --cnt) + /// buffer[cnt] op= buffer[cnt-i]; + /// } + /// \endcode + /// \param Loc The insert and source location description. + /// \param ReductionInfos Array type containing the ReductionOps. + /// \param ScanRedInfo Pointer to the ScanInfo objected created using + /// `ScanInfoInitialize`. + /// + /// \returns The insertion position *after* the masked. + InsertPointOrErrorTy emitScanReduction( + const LocationDescription &Loc, + ArrayRef ReductionInfos, + ScanInfo *ScanRedInfo); + + /// This directive split and directs the control flow to input phase + /// blocks or scan phase blocks based on 1. whether input loop or scan loop + /// is executed, 2. whether exclusive or inclusive scan is used. + /// + /// \param Loc The insert and source location description. + /// \param AllocaIP The IP where the temporary buffer for scan reduction + // needs to be allocated. + /// \param ScanVars Scan Variables. + /// \param IsInclusive Whether it is an inclusive or exclusive scan. + /// \param ScanRedInfo Pointer to the ScanInfo objected created using + /// `ScanInfoInitialize`. + /// + /// \returns The insertion position *after* the scan. + InsertPointOrErrorTy createScan(const LocationDescription &Loc, + InsertPointTy AllocaIP, + ArrayRef ScanVars, + ArrayRef ScanVarsType, + bool IsInclusive, ScanInfo *ScanRedInfo); + /// Generator for '#omp critical' /// /// \param Loc The insert and source location description. @@ -3774,6 +3904,93 @@ class CanonicalLoopInfo { LLVM_ABI void invalidate(); }; +/// ScanInfo holds the information to assist in lowering of Scan reduction. +/// Before lowering, the body of the for loop specifying scan reduction is +/// expected to have the following structure +/// +/// Loop Body Entry +/// | +/// Code before the scan directive +/// | +/// Scan Directive +/// | +/// Code after the scan directive +/// | +/// Loop Body Exit +/// When `createCanonicalScanLoops` is executed, the bodyGen callback of it +/// transforms the body to: +/// +/// Loop Body Entry +/// | +/// OMPScanDispatch +/// +/// OMPBeforeScanBlock +/// | +/// OMPScanLoopExit +/// | +/// Loop Body Exit +/// +/// The insert point is updated to the first insert point of OMPBeforeScanBlock. +/// It dominates the control flow of code generated until +/// scan directive is encountered and OMPAfterScanBlock dominates the +/// control flow of code generated after scan is encountered. The successor +/// of OMPScanDispatch can be OMPBeforeScanBlock or OMPAfterScanBlock based +/// on 1.whether it is in Input phase or Scan Phase , 2. whether it is an +/// exclusive or inclusive scan. This jump is added when `createScan` is +/// executed. If input loop is being generated, if it is inclusive scan, +/// `OMPAfterScanBlock` succeeds `OMPScanDispatch` , if exclusive, +/// `OMPBeforeScanBlock` succeeds `OMPDispatch` and vice versa for scan loop. At +/// the end of the input loop, temporary buffer is populated and at the +/// beginning of the scan loop, temporary buffer is read. After scan directive +/// is encountered, insertion point is updated to `OMPAfterScanBlock` as it is +/// expected to dominate the code after the scan directive. Both Before and +/// After scan blocks are succeeded by `OMPScanLoopExit`. +/// Temporary buffer allocations are done in `ScanLoopInit` block before the +/// lowering of for-loop. The results are copied back to reduction variable in +/// `ScanLoopFinish` block. +class ScanInfo { +public: + /// Dominates the body of the loop before scan directive + llvm::BasicBlock *OMPBeforeScanBlock = nullptr; + + /// Dominates the body of the loop before scan directive + llvm::BasicBlock *OMPAfterScanBlock = nullptr; + + /// Controls the flow to before or after scan blocks + llvm::BasicBlock *OMPScanDispatch = nullptr; + + /// Exit block of loop body + llvm::BasicBlock *OMPScanLoopExit = nullptr; + + /// Block before loop body where scan initializations are done + llvm::BasicBlock *OMPScanInit = nullptr; + + /// Block after loop body where scan finalizations are done + llvm::BasicBlock *OMPScanFinish = nullptr; + + /// If true, it indicates Input phase is lowered; else it indicates + /// ScanPhase is lowered + bool OMPFirstScanLoop = false; + + /// Maps the private reduction variable to the pointer of the temporary + /// buffer + llvm::SmallDenseMap *ScanBuffPtrs; + + /// Keeps track of value of iteration variable for input/scan loop to be + /// used for Scan directive lowering + llvm::Value *IV; + + /// Stores the span of canonical loop being lowered to be used for temporary + /// buffer allocation or Finalization. + llvm::Value *Span; + + ScanInfo() { + ScanBuffPtrs = new llvm::SmallDenseMap(); + } + + ~ScanInfo() { delete (ScanBuffPtrs); } +}; + } // end namespace llvm #endif // LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 840ca8364e218..9f90411b54d00 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -4021,6 +4021,340 @@ OpenMPIRBuilder::createMasked(const LocationDescription &Loc, /*Conditional*/ true, /*hasFinalize*/ true); } +static llvm::CallInst *emitNoUnwindRuntimeCall(IRBuilder<> &Builder, + llvm::FunctionCallee Callee, + ArrayRef Args, + const llvm::Twine &Name) { + llvm::CallInst *Call = Builder.CreateCall( + Callee, Args, SmallVector(), Name); + Call->setDoesNotThrow(); + return Call; +} + +// Expects input basic block is dominated by BeforeScanBB. +// Once Scan directive is encountered, the code after scan directive should be +// dominated by AfterScanBB. Scan directive splits the code sequence to +// scan and input phase. Based on whether inclusive or exclusive +// clause is used in the scan directive and whether input loop or scan loop +// is lowered, it adds jumps to input and scan phase. First Scan loop is the +// input loop and second is the scan loop. The code generated handles only +// inclusive scans now. +OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createScan( + const LocationDescription &Loc, InsertPointTy AllocaIP, + ArrayRef ScanVars, ArrayRef ScanVarsType, + bool IsInclusive, ScanInfo *ScanRedInfo) { + if (ScanRedInfo->OMPFirstScanLoop) { + llvm::Error Err = emitScanBasedDirectiveDeclsIR(AllocaIP, ScanVars, + ScanVarsType, ScanRedInfo); + if (Err) + return Err; + } + if (!updateToLocation(Loc)) + return Loc.IP; + + llvm::Value *IV = ScanRedInfo->IV; + + if (ScanRedInfo->OMPFirstScanLoop) { + // Emit buffer[i] = red; at the end of the input phase. + for (size_t i = 0; i < ScanVars.size(); i++) { + Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]]; + Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr); + Type *DestTy = ScanVarsType[i]; + Value *Val = Builder.CreateInBoundsGEP(DestTy, Buff, IV, "arrayOffset"); + Value *Src = Builder.CreateLoad(DestTy, ScanVars[i]); + + Builder.CreateStore(Src, Val); + } + } + Builder.CreateBr(ScanRedInfo->OMPScanLoopExit); + emitBlock(ScanRedInfo->OMPScanDispatch, + Builder.GetInsertBlock()->getParent()); + + if (!ScanRedInfo->OMPFirstScanLoop) { + IV = ScanRedInfo->IV; + // Emit red = buffer[i]; at the entrance to the scan phase. + // TODO: if exclusive scan, the red = buffer[i-1] needs to be updated. + for (size_t i = 0; i < ScanVars.size(); i++) { + Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]]; + Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr); + Type *DestTy = ScanVarsType[i]; + Value *SrcPtr = + Builder.CreateInBoundsGEP(DestTy, Buff, IV, "arrayOffset"); + Value *Src = Builder.CreateLoad(DestTy, SrcPtr); + Builder.CreateStore(Src, ScanVars[i]); + } + } + + // TODO: Update it to CreateBr and remove dead blocks + llvm::Value *CmpI = Builder.getInt1(true); + if (ScanRedInfo->OMPFirstScanLoop == IsInclusive) { + Builder.CreateCondBr(CmpI, ScanRedInfo->OMPBeforeScanBlock, + ScanRedInfo->OMPAfterScanBlock); + } else { + Builder.CreateCondBr(CmpI, ScanRedInfo->OMPAfterScanBlock, + ScanRedInfo->OMPBeforeScanBlock); + } + emitBlock(ScanRedInfo->OMPAfterScanBlock, + Builder.GetInsertBlock()->getParent()); + Builder.SetInsertPoint(ScanRedInfo->OMPAfterScanBlock); + return Builder.saveIP(); +} + +Error OpenMPIRBuilder::emitScanBasedDirectiveDeclsIR( + InsertPointTy AllocaIP, ArrayRef ScanVars, + ArrayRef ScanVarsType, ScanInfo *ScanRedInfo) { + + Builder.restoreIP(AllocaIP); + // Create the shared pointer at alloca IP. + for (size_t i = 0; i < ScanVars.size(); i++) { + llvm::Value *BuffPtr = + Builder.CreateAlloca(Builder.getPtrTy(), nullptr, "vla"); + (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]] = BuffPtr; + } + + // Allocate temporary buffer by master thread + auto BodyGenCB = [&](InsertPointTy AllocaIP, + InsertPointTy CodeGenIP) -> Error { + Builder.restoreIP(CodeGenIP); + Value *AllocSpan = + Builder.CreateAdd(ScanRedInfo->Span, Builder.getInt32(1)); + for (size_t i = 0; i < ScanVars.size(); i++) { + Type *IntPtrTy = Builder.getInt32Ty(); + Constant *Allocsize = ConstantExpr::getSizeOf(ScanVarsType[i]); + Allocsize = ConstantExpr::getTruncOrBitCast(Allocsize, IntPtrTy); + Value *Buff = Builder.CreateMalloc(IntPtrTy, ScanVarsType[i], Allocsize, + AllocSpan, nullptr, "arr"); + Builder.CreateStore(Buff, (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]]); + } + return Error::success(); + }; + // TODO: Perform finalization actions for variables. This has to be + // called for variables which have destructors/finalizers. + auto FiniCB = [&](InsertPointTy CodeGenIP) { return llvm::Error::success(); }; + + Builder.SetInsertPoint(ScanRedInfo->OMPScanInit->getTerminator()); + llvm::Value *FilterVal = Builder.getInt32(0); + llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = + createMasked(Builder.saveIP(), BodyGenCB, FiniCB, FilterVal); + + if (!AfterIP) + return AfterIP.takeError(); + Builder.restoreIP(*AfterIP); + BasicBlock *InputBB = Builder.GetInsertBlock(); + if (InputBB->getTerminator()) + Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator()); + AfterIP = createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier); + if (!AfterIP) + return AfterIP.takeError(); + Builder.restoreIP(*AfterIP); + + return Error::success(); +} + +Error OpenMPIRBuilder::emitScanBasedDirectiveFinalsIR( + ArrayRef ReductionInfos, ScanInfo *ScanRedInfo) { + auto BodyGenCB = [&](InsertPointTy AllocaIP, + InsertPointTy CodeGenIP) -> Error { + Builder.restoreIP(CodeGenIP); + for (ReductionInfo RedInfo : ReductionInfos) { + Value *PrivateVar = RedInfo.PrivateVariable; + Value *OrigVar = RedInfo.Variable; + Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[PrivateVar]; + Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr); + + Type *SrcTy = RedInfo.ElementType; + Value *Val = Builder.CreateInBoundsGEP(SrcTy, Buff, ScanRedInfo->Span, + "arrayOffset"); + Value *Src = Builder.CreateLoad(SrcTy, Val); + + Builder.CreateStore(Src, OrigVar); + Builder.CreateFree(Buff); + } + return Error::success(); + }; + // TODO: Perform finalization actions for variables. This has to be + // called for variables which have destructors/finalizers. + auto FiniCB = [&](InsertPointTy CodeGenIP) { return llvm::Error::success(); }; + + if (ScanRedInfo->OMPScanFinish->getTerminator()) + Builder.SetInsertPoint(ScanRedInfo->OMPScanFinish->getTerminator()); + else + Builder.SetInsertPoint(ScanRedInfo->OMPScanFinish); + + llvm::Value *FilterVal = Builder.getInt32(0); + llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = + createMasked(Builder.saveIP(), BodyGenCB, FiniCB, FilterVal); + + if (!AfterIP) + return AfterIP.takeError(); + Builder.restoreIP(*AfterIP); + BasicBlock *InputBB = Builder.GetInsertBlock(); + if (InputBB->getTerminator()) + Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator()); + AfterIP = createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier); + if (!AfterIP) + return AfterIP.takeError(); + Builder.restoreIP(*AfterIP); + return Error::success(); +} + +OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitScanReduction( + const LocationDescription &Loc, + ArrayRef ReductionInfos, + ScanInfo *ScanRedInfo) { + + if (!updateToLocation(Loc)) + return Loc.IP; + auto BodyGenCB = [&](InsertPointTy AllocaIP, + InsertPointTy CodeGenIP) -> Error { + Builder.restoreIP(CodeGenIP); + Function *CurFn = Builder.GetInsertBlock()->getParent(); + // for (int k = 0; k <= ceil(log2(n)); ++k) + llvm::BasicBlock *LoopBB = + BasicBlock::Create(CurFn->getContext(), "omp.outer.log.scan.body"); + llvm::BasicBlock *ExitBB = + splitBB(Builder, false, "omp.outer.log.scan.exit"); + llvm::Function *F = llvm::Intrinsic::getOrInsertDeclaration( + Builder.GetInsertBlock()->getModule(), + (llvm::Intrinsic::ID)llvm::Intrinsic::log2, Builder.getDoubleTy()); + llvm::BasicBlock *InputBB = Builder.GetInsertBlock(); + llvm::Value *Arg = + Builder.CreateUIToFP(ScanRedInfo->Span, Builder.getDoubleTy()); + llvm::Value *LogVal = emitNoUnwindRuntimeCall(Builder, F, Arg, ""); + F = llvm::Intrinsic::getOrInsertDeclaration( + Builder.GetInsertBlock()->getModule(), + (llvm::Intrinsic::ID)llvm::Intrinsic::ceil, Builder.getDoubleTy()); + LogVal = emitNoUnwindRuntimeCall(Builder, F, LogVal, ""); + LogVal = Builder.CreateFPToUI(LogVal, Builder.getInt32Ty()); + llvm::Value *NMin1 = Builder.CreateNUWSub( + ScanRedInfo->Span, + llvm::ConstantInt::get(ScanRedInfo->Span->getType(), 1)); + Builder.SetInsertPoint(InputBB); + Builder.CreateBr(LoopBB); + emitBlock(LoopBB, CurFn); + Builder.SetInsertPoint(LoopBB); + + PHINode *Counter = Builder.CreatePHI(Builder.getInt32Ty(), 2); + // size pow2k = 1; + PHINode *Pow2K = Builder.CreatePHI(Builder.getInt32Ty(), 2); + Counter->addIncoming(llvm::ConstantInt::get(Builder.getInt32Ty(), 0), + InputBB); + Pow2K->addIncoming(llvm::ConstantInt::get(Builder.getInt32Ty(), 1), + InputBB); + // for (size i = n - 1; i >= 2 ^ k; --i) + // tmp[i] op= tmp[i-pow2k]; + llvm::BasicBlock *InnerLoopBB = + BasicBlock::Create(CurFn->getContext(), "omp.inner.log.scan.body"); + llvm::BasicBlock *InnerExitBB = + BasicBlock::Create(CurFn->getContext(), "omp.inner.log.scan.exit"); + llvm::Value *CmpI = Builder.CreateICmpUGE(NMin1, Pow2K); + Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB); + emitBlock(InnerLoopBB, CurFn); + Builder.SetInsertPoint(InnerLoopBB); + PHINode *IVal = Builder.CreatePHI(Builder.getInt32Ty(), 2); + IVal->addIncoming(NMin1, LoopBB); + for (ReductionInfo RedInfo : ReductionInfos) { + Value *ReductionVal = RedInfo.PrivateVariable; + Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[ReductionVal]; + Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr); + Type *DestTy = RedInfo.ElementType; + Value *IV = Builder.CreateAdd(IVal, Builder.getInt32(1)); + Value *LHSPtr = + Builder.CreateInBoundsGEP(DestTy, Buff, IV, "arrayOffset"); + Value *OffsetIval = Builder.CreateNUWSub(IV, Pow2K); + Value *RHSPtr = + Builder.CreateInBoundsGEP(DestTy, Buff, OffsetIval, "arrayOffset"); + Value *LHS = Builder.CreateLoad(DestTy, LHSPtr); + Value *RHS = Builder.CreateLoad(DestTy, RHSPtr); + llvm::Value *Result; + InsertPointOrErrorTy AfterIP = + RedInfo.ReductionGen(Builder.saveIP(), LHS, RHS, Result); + if (!AfterIP) + return AfterIP.takeError(); + Builder.CreateStore(Result, LHSPtr); + } + llvm::Value *NextIVal = Builder.CreateNUWSub( + IVal, llvm::ConstantInt::get(Builder.getInt32Ty(), 1)); + IVal->addIncoming(NextIVal, Builder.GetInsertBlock()); + CmpI = Builder.CreateICmpUGE(NextIVal, Pow2K); + Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB); + emitBlock(InnerExitBB, CurFn); + llvm::Value *Next = Builder.CreateNUWAdd( + Counter, llvm::ConstantInt::get(Counter->getType(), 1)); + Counter->addIncoming(Next, Builder.GetInsertBlock()); + // pow2k <<= 1; + llvm::Value *NextPow2K = Builder.CreateShl(Pow2K, 1, "", /*HasNUW=*/true); + Pow2K->addIncoming(NextPow2K, Builder.GetInsertBlock()); + llvm::Value *Cmp = Builder.CreateICmpNE(Next, LogVal); + Builder.CreateCondBr(Cmp, LoopBB, ExitBB); + Builder.SetInsertPoint(ExitBB->getFirstInsertionPt()); + return Error::success(); + }; + + // TODO: Perform finalization actions for variables. This has to be + // called for variables which have destructors/finalizers. + auto FiniCB = [&](InsertPointTy CodeGenIP) { return llvm::Error::success(); }; + + llvm::Value *FilterVal = Builder.getInt32(0); + llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = + createMasked(Builder.saveIP(), BodyGenCB, FiniCB, FilterVal); + + if (!AfterIP) + return AfterIP.takeError(); + Builder.restoreIP(*AfterIP); + AfterIP = createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier); + + if (!AfterIP) + return AfterIP.takeError(); + Builder.restoreIP(*AfterIP); + Error Err = emitScanBasedDirectiveFinalsIR(ReductionInfos, ScanRedInfo); + if (Err) + return Err; + + return AfterIP; +} + +Error OpenMPIRBuilder::emitScanBasedDirectiveIR( + llvm::function_ref InputLoopGen, + llvm::function_ref ScanLoopGen, + ScanInfo *ScanRedInfo) { + + { + // Emit loop with input phase: + // for (i: 0..) { + // ; + // buffer[i] = red; + // } + ScanRedInfo->OMPFirstScanLoop = true; + Error Err = InputLoopGen(); + if (Err) + return Err; + } + { + // Emit loop with scan phase: + // for (i: 0..) { + // red = buffer[i]; + // ; + // } + ScanRedInfo->OMPFirstScanLoop = false; + Error Err = ScanLoopGen(Builder.saveIP()); + if (Err) + return Err; + } + return Error::success(); +} + +void OpenMPIRBuilder::createScanBBs(ScanInfo *ScanRedInfo) { + Function *Fun = Builder.GetInsertBlock()->getParent(); + ScanRedInfo->OMPScanDispatch = + BasicBlock::Create(Fun->getContext(), "omp.inscan.dispatch"); + ScanRedInfo->OMPAfterScanBlock = + BasicBlock::Create(Fun->getContext(), "omp.after.scan.bb"); + ScanRedInfo->OMPBeforeScanBlock = + BasicBlock::Create(Fun->getContext(), "omp.before.scan.bb"); + ScanRedInfo->OMPScanLoopExit = + BasicBlock::Create(Fun->getContext(), "omp.scan.loop.exit"); +} CanonicalLoopInfo *OpenMPIRBuilder::createLoopSkeleton( DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore, BasicBlock *PostInsertBefore, const Twine &Name) { @@ -4118,6 +4452,76 @@ OpenMPIRBuilder::createCanonicalLoop(const LocationDescription &Loc, return CL; } +Expected OpenMPIRBuilder::scanInfoInitialize() { + ScanInfos.emplace_front(); + ScanInfo *Result = &ScanInfos.front(); + return Result; +} + +Expected> +OpenMPIRBuilder::createCanonicalScanLoops( + const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, + Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop, + InsertPointTy ComputeIP, const Twine &Name, ScanInfo *ScanRedInfo) { + LocationDescription ComputeLoc = + ComputeIP.isSet() ? LocationDescription(ComputeIP, Loc.DL) : Loc; + updateToLocation(ComputeLoc); + + SmallVector Result; + + Value *TripCount = calculateCanonicalLoopTripCount( + ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop, Name); + ScanRedInfo->Span = TripCount; + ScanRedInfo->OMPScanInit = splitBB(Builder, true, "scan.init"); + Builder.SetInsertPoint(ScanRedInfo->OMPScanInit); + + auto BodyGen = [=](InsertPointTy CodeGenIP, Value *IV) { + Builder.restoreIP(CodeGenIP); + ScanRedInfo->IV = IV; + createScanBBs(ScanRedInfo); + BasicBlock *InputBlock = Builder.GetInsertBlock(); + Instruction *Terminator = InputBlock->getTerminator(); + assert(Terminator->getNumSuccessors() == 1); + BasicBlock *ContinueBlock = Terminator->getSuccessor(0); + Terminator->setSuccessor(0, ScanRedInfo->OMPScanDispatch); + emitBlock(ScanRedInfo->OMPBeforeScanBlock, + Builder.GetInsertBlock()->getParent()); + Builder.CreateBr(ScanRedInfo->OMPScanLoopExit); + emitBlock(ScanRedInfo->OMPScanLoopExit, + Builder.GetInsertBlock()->getParent()); + Builder.CreateBr(ContinueBlock); + Builder.SetInsertPoint( + ScanRedInfo->OMPBeforeScanBlock->getFirstInsertionPt()); + return BodyGenCB(Builder.saveIP(), IV); + }; + + const auto &&InputLoopGen = [&]() -> Error { + Expected LoopInfo = createCanonicalLoop( + Builder.saveIP(), BodyGen, Start, Stop, Step, IsSigned, InclusiveStop, + ComputeIP, Name, true, ScanRedInfo); + if (!LoopInfo) + return LoopInfo.takeError(); + Result.push_back(*LoopInfo); + Builder.restoreIP((*LoopInfo)->getAfterIP()); + return Error::success(); + }; + const auto &&ScanLoopGen = [&](LocationDescription Loc) -> Error { + Expected LoopInfo = + createCanonicalLoop(Loc, BodyGen, Start, Stop, Step, IsSigned, + InclusiveStop, ComputeIP, Name, true, ScanRedInfo); + if (!LoopInfo) + return LoopInfo.takeError(); + Result.push_back(*LoopInfo); + Builder.restoreIP((*LoopInfo)->getAfterIP()); + ScanRedInfo->OMPScanFinish = Builder.GetInsertBlock(); + return Error::success(); + }; + Error Err = emitScanBasedDirectiveIR(InputLoopGen, ScanLoopGen, ScanRedInfo); + if (Err) + return Err; + return Result; +} + Value *OpenMPIRBuilder::calculateCanonicalLoopTripCount( const LocationDescription &Loc, Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop, const Twine &Name) { @@ -4181,7 +4585,8 @@ Value *OpenMPIRBuilder::calculateCanonicalLoopTripCount( Expected OpenMPIRBuilder::createCanonicalLoop( const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop, - InsertPointTy ComputeIP, const Twine &Name) { + InsertPointTy ComputeIP, const Twine &Name, bool InScan, + ScanInfo *ScanRedInfo) { LocationDescription ComputeLoc = ComputeIP.isSet() ? LocationDescription(ComputeIP, Loc.DL) : Loc; @@ -4192,6 +4597,8 @@ Expected OpenMPIRBuilder::createCanonicalLoop( Builder.restoreIP(CodeGenIP); Value *Span = Builder.CreateMul(IV, Step); Value *IndVar = Builder.CreateAdd(Span, Start); + if (InScan) + ScanRedInfo->IV = IndVar; return BodyGenCB(Builder.saveIP(), IndVar); }; LocationDescription LoopLoc = diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index d6b578aa8ffd1..b7a060bb3563d 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -23,6 +23,7 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "gmock/gmock.h" #include "gtest/gtest.h" +#include #include using namespace llvm; @@ -5360,6 +5361,144 @@ TEST_F(OpenMPIRBuilderTest, CreateReductions) { EXPECT_TRUE(findGEPZeroOne(ReductionFn->getArg(1), FirstRHS, SecondRHS)); } +static void createScan(llvm::Value *scanVar, llvm::Type *scanType, + OpenMPIRBuilder &OMPBuilder, IRBuilder<> &Builder, + OpenMPIRBuilder::LocationDescription Loc, + OpenMPIRBuilder::InsertPointTy &allocaIP, + ScanInfo *&ScanRedInfo) { + using InsertPointTy = OpenMPIRBuilder::InsertPointTy; + ASSERT_EXPECTED_INIT(InsertPointTy, retIp, + OMPBuilder.createScan(Loc, allocaIP, {scanVar}, + {scanType}, true, ScanRedInfo)); + Builder.restoreIP(retIp); +} +/* + Following is the pseudocode of the code generated by the test case + ptr + size num_iters = 100 + // temp buffer allocation + omp masked { + buff = malloc(num_iters*scanvarstype) + *ptr = buff + } + barrier; + // input phase loop + for (i: 0..) { + ; + buffer = *ptr; + buffer[i] = red; + } + // scan reduction + omp masked + { + for (int k = 0; k != ceil(log2(num_iters)); ++k) { + i=pow(2,k) + for (size cnt = last_iter; cnt >= i; --cnt) { + buffer = *ptr; + buffer[cnt] op= buffer[cnt-i]; + } + } + } + barrier; + // scan phase loop + for (0..) { + buffer = *ptr; + red = buffer[i] ; + ; + } + // temp buffer deletion + omp masked { + free(*ptr) + } + barrier; +*/ +TEST_F(OpenMPIRBuilderTest, ScanReduction) { + using InsertPointTy = OpenMPIRBuilder::InsertPointTy; + OpenMPIRBuilder OMPBuilder(*M); + OMPBuilder.initialize(); + IRBuilder<> Builder(BB); + OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); + Value *TripCount = F->getArg(0); + Type *LCTy = TripCount->getType(); + Value *StartVal = ConstantInt::get(LCTy, 1); + Value *StopVal = ConstantInt::get(LCTy, 100); + Value *Step = ConstantInt::get(LCTy, 1); + auto AllocaIP = Builder.saveIP(); + + llvm::Value *ScanVar = Builder.CreateAlloca(Builder.getFloatTy()); + llvm::Value *OrigVar = Builder.CreateAlloca(Builder.getFloatTy()); + unsigned NumBodiesGenerated = 0; + ScanInfo *ScanRedInfo; + ASSERT_EXPECTED_INIT(ScanInfo *, ScanInformation, + OMPBuilder.scanInfoInitialize()); + ScanRedInfo = ScanInformation; + auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) { + NumBodiesGenerated += 1; + Builder.restoreIP(CodeGenIP); + createScan(ScanVar, Builder.getFloatTy(), OMPBuilder, Builder, Loc, + AllocaIP, ScanRedInfo); + return Error::success(); + }; + llvm::SmallVector loops; + ASSERT_EXPECTED_INIT(llvm::SmallVector, loopvec, + OMPBuilder.createCanonicalScanLoops( + Loc, LoopBodyGenCB, StartVal, StopVal, Step, false, + false, Builder.saveIP(), "scan", ScanRedInfo)); + loops = loopvec; + CanonicalLoopInfo *InputLoop = loops.front(); + CanonicalLoopInfo *ScanLoop = loops.back(); + Builder.restoreIP(ScanLoop->getAfterIP()); + InputLoop->assertOK(); + ScanLoop->assertOK(); + + EXPECT_EQ(ScanLoop->getAfter(), Builder.GetInsertBlock()); + EXPECT_EQ(NumBodiesGenerated, 2U); + SmallVector ReductionInfos = { + {Builder.getFloatTy(), OrigVar, ScanVar, + /*EvaluationKind=*/OpenMPIRBuilder::EvalKind::Scalar, sumReduction, + /*ReductionGenClang=*/nullptr, sumAtomicReduction}}; + OpenMPIRBuilder::LocationDescription RedLoc({InputLoop->getAfterIP(), DL}); + llvm::BasicBlock *Cont = splitBB(Builder, false, "omp.scan.loop.cont"); + ASSERT_EXPECTED_INIT( + InsertPointTy, retIp, + OMPBuilder.emitScanReduction(RedLoc, ReductionInfos, ScanRedInfo)); + Builder.restoreIP(retIp); + Builder.CreateBr(Cont); + Builder.SetInsertPoint(Cont); + unsigned NumMallocs = 0; + unsigned NumFrees = 0; + unsigned NumMasked = 0; + unsigned NumEndMasked = 0; + unsigned NumLog = 0; + unsigned NumCeil = 0; + for (Instruction &I : instructions(F)) { + if (!isa(I)) + continue; + CallInst *Call = dyn_cast(&I); + StringRef Name = Call->getCalledFunction()->getName(); + if (Name.equals_insensitive("malloc")) { + NumMallocs += 1; + } else if (Name.equals_insensitive("free")) { + NumFrees += 1; + } else if (Name.equals_insensitive("__kmpc_masked")) { + NumMasked += 1; + } else if (Name.equals_insensitive("__kmpc_end_masked")) { + NumEndMasked += 1; + } else if (Name.equals_insensitive("llvm.log2.f64")) { + NumLog += 1; + } else if (Name.equals_insensitive("llvm.ceil.f64")) { + NumCeil += 1; + } + } + EXPECT_EQ(NumBodiesGenerated, 2U); + EXPECT_EQ(NumMasked, 3U); + EXPECT_EQ(NumEndMasked, 3U); + EXPECT_EQ(NumMallocs, 1U); + EXPECT_EQ(NumFrees, 1U); + EXPECT_EQ(NumLog, 1U); + EXPECT_EQ(NumCeil, 1U); +} + TEST_F(OpenMPIRBuilderTest, CreateTwoReductions) { using InsertPointTy = OpenMPIRBuilder::InsertPointTy; OpenMPIRBuilder OMPBuilder(*M);